Sharder: add more logs during container cleaving.

Add more logs for each important cleaving step to help
investigate some slow cleaving processes.

Also fixed some misleading test names.

Co-Authored-By: Alistair Coles <alistairncoles@gmail.com>
Change-Id: I4727dcb6268c68c04d86e109aa14f54234a656f9
This commit is contained in:
Jianjian Huo 2022-10-25 10:22:45 -07:00
parent 61cc0573e3
commit c4ad66b494
2 changed files with 45 additions and 23 deletions

View File

@ -1777,8 +1777,9 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator):
self._send_shard_ranges(
broker.root_account, broker.root_container, created_ranges)
self.logger.info(
"Completed creating shard range containers: %d created.",
len(created_ranges))
"Completed creating shard range containers: %d created, "
"from sharding container %s",
len(created_ranges), quote(broker.path))
return len(created_ranges)
def _cleave_shard_broker(self, broker, cleaving_context, shard_range,
@ -1967,8 +1968,8 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator):
own_shard_range = broker.get_own_shard_range()
cleaving_context.cursor = own_shard_range.lower_str
cleaving_context.ranges_todo = len(ranges_todo)
self.logger.debug('Starting to cleave (%s todo): %s',
cleaving_context.ranges_todo, quote(broker.path))
self.logger.info('Starting to cleave (%s todo): %s',
cleaving_context.ranges_todo, quote(broker.path))
own_shard_range = broker.get_own_shard_range(no_default=True)
if own_shard_range is None:
@ -2173,9 +2174,14 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator):
ShardRange.SHRUNK):
if broker.get_shard_ranges():
# container has been given shard ranges rather than
# found them e.g. via replication or a shrink event
# found them e.g. via replication or a shrink event,
# or manually triggered cleaving.
if broker.set_sharding_state():
state = SHARDING
self.logger.info('Kick off container cleaving on %s, '
'own shard range in state %r',
quote(broker.path),
own_shard_range.state_text)
elif is_leader:
if broker.set_sharding_state():
state = SHARDING
@ -2202,14 +2208,16 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator):
cleave_complete = self._cleave(broker)
if cleave_complete:
self.logger.info('Completed cleaving of %s',
quote(broker.path))
if self._complete_sharding(broker):
state = SHARDED
self._increment_stat('visited', 'completed', statsd=True)
self.logger.info(
'Completed cleaving of %s, DB set to sharded state',
quote(broker.path))
else:
self.logger.debug('Remaining in sharding state %s',
quote(broker.path))
self.logger.info(
'Completed cleaving of %s, DB remaining in sharding '
'state', quote(broker.path))
if not broker.is_deleted():
if state == SHARDED and broker.is_root_container():

View File

@ -2112,7 +2112,6 @@ class TestSharder(BaseTestSharder):
os.path.join(self.tempdir, 'sda', 'containers', '0',
db_hash[-3:], db_hash, db_hash + '.db'))
broker.merge_shard_ranges(shard_ranges)
self.assertTrue(broker.set_sharding_state())
old_broker = broker.get_brokers()[0]
node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2',
'index': 0}
@ -2166,6 +2165,12 @@ class TestSharder(BaseTestSharder):
self.assertEqual('', context.cursor)
self.assertEqual(10, context.cleave_to_row)
self.assertEqual(12, context.max_row) # note that max row increased
lines = sharder.logger.get_lines_for_level('info')
self.assertEqual(
["Kick off container cleaving on a/c, own shard range in state "
"'sharding'", "Starting to cleave (2 todo): a/c"], lines[:2])
self.assertIn('Completed cleaving of a/c, DB remaining in '
'sharding state', lines[1:])
lines = sharder.logger.get_lines_for_level('warning')
self.assertIn('Repeat cleaving required', lines[0])
self.assertFalse(lines[1:])
@ -2194,6 +2199,10 @@ class TestSharder(BaseTestSharder):
shard_ranges[1].state = ShardRange.ACTIVE
self._check_shard_range(shard_ranges[1], updated_shard_ranges[1])
self._check_objects(new_objects[1:], expected_shard_dbs[1])
lines = sharder.logger.get_lines_for_level('info')
self.assertEqual('Starting to cleave (2 todo): a/c', lines[0])
self.assertIn('Completed cleaving of a/c, DB set to sharded state',
lines[1:])
self.assertFalse(sharder.logger.get_lines_for_level('warning'))
def test_cleave_multiple_storage_policies(self):
@ -5005,7 +5014,7 @@ class TestSharder(BaseTestSharder):
self.assertFalse(broker.logger.get_lines_for_level('error'))
broker.logger.clear()
def _check_process_broker_sharding_no_others(self, start_state, deleted):
def _check_process_broker_sharding_others(self, start_state, deleted):
# verify that when existing own_shard_range has given state and there
# are other shard ranges then the sharding process will complete
broker = self._make_broker(hash_='hash%s%s' % (start_state, deleted))
@ -5021,6 +5030,7 @@ class TestSharder(BaseTestSharder):
broker.delete_db(next(self.ts_iter).internal)
with self._mock_sharder() as sharder:
# pretend shard containers are created ok so sharding proceeds
with mock.patch.object(
sharder, '_send_shard_ranges', return_value=True):
with mock_timestamp_now_with_iter(self.ts_iter):
@ -5030,33 +5040,36 @@ class TestSharder(BaseTestSharder):
final_own_sr = broker.get_own_shard_range(no_default=True)
self.assertEqual(SHARDED, broker.get_db_state())
self.assertEqual(epoch.normal, parse_db_filename(broker.db_file)[1])
lines = broker.logger.get_lines_for_level('info')
self.assertIn('Completed creating shard range containers: 2 created, '
'from sharding container a/c', lines)
self.assertFalse(broker.logger.get_lines_for_level('warning'))
self.assertFalse(broker.logger.get_lines_for_level('error'))
# self.assertEqual(deleted, broker.is_deleted())
self.assertEqual(deleted, broker.is_deleted())
return own_sr, final_own_sr
def test_process_broker_sharding_with_own_shard_range_no_others(self):
own_sr, final_own_sr = self._check_process_broker_sharding_no_others(
def test_process_broker_sharding_completes_with_own_and_other_ranges(self):
own_sr, final_own_sr = self._check_process_broker_sharding_others(
ShardRange.SHARDING, False)
exp_own_sr = dict(own_sr, state=ShardRange.SHARDED,
meta_timestamp=mock.ANY)
self.assertEqual(exp_own_sr, dict(final_own_sr))
# verify that deleted DBs will be sharded
own_sr, final_own_sr = self._check_process_broker_sharding_no_others(
own_sr, final_own_sr = self._check_process_broker_sharding_others(
ShardRange.SHARDING, True)
exp_own_sr = dict(own_sr, state=ShardRange.SHARDED,
meta_timestamp=mock.ANY)
self.assertEqual(exp_own_sr, dict(final_own_sr))
own_sr, final_own_sr = self._check_process_broker_sharding_no_others(
own_sr, final_own_sr = self._check_process_broker_sharding_others(
ShardRange.SHRINKING, False)
exp_own_sr = dict(own_sr, state=ShardRange.SHRUNK,
meta_timestamp=mock.ANY)
self.assertEqual(exp_own_sr, dict(final_own_sr))
# verify that deleted DBs will be shrunk
own_sr, final_own_sr = self._check_process_broker_sharding_no_others(
own_sr, final_own_sr = self._check_process_broker_sharding_others(
ShardRange.SHRINKING, True)
exp_own_sr = dict(own_sr, state=ShardRange.SHRUNK,
meta_timestamp=mock.ANY)
@ -5114,9 +5127,10 @@ class TestSharder(BaseTestSharder):
self.assertFalse(broker.logger.get_lines_for_level('error'))
broker.logger.clear()
def _check_process_broker_sharding_others(self, state):
def _check_process_broker_sharding_stalls_others(self, state):
# verify states in which own_shard_range will cause sharding
# process to start when other shard ranges are in the db
# process to start when other shard ranges are in the db, but stop
# when shard containers have not been created
broker = self._make_broker(hash_='hash%s' % state)
node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2',
'index': 0}
@ -5148,10 +5162,10 @@ class TestSharder(BaseTestSharder):
self.assertFalse(broker.logger.get_lines_for_level('warning'))
self.assertFalse(broker.logger.get_lines_for_level('error'))
def test_process_broker_sharding_with_own_shard_range_and_others(self):
self._check_process_broker_sharding_others(ShardRange.SHARDING)
self._check_process_broker_sharding_others(ShardRange.SHRINKING)
self._check_process_broker_sharding_others(ShardRange.SHARDED)
def test_process_broker_sharding_stalls_with_own_and_other_ranges(self):
self._check_process_broker_sharding_stalls_others(ShardRange.SHARDING)
self._check_process_broker_sharding_stalls_others(ShardRange.SHRINKING)
self._check_process_broker_sharding_stalls_others(ShardRange.SHARDED)
def test_process_broker_leader_auto_shard(self):
# verify conditions for acting as auto-shard leader