From 230b467af09af2d4df42915d1247a1304ac61b2a Mon Sep 17 00:00:00 2001 From: Jianjian Huo Date: Tue, 8 Nov 2022 23:13:18 -0800 Subject: [PATCH] sharding: avoid transient overlaps while auditing. During normal sharding, parent shard and child shard ranges may have transient overlaps which will disapear when done. To avoid this transient overlap shows up in the warning log, when sharder audits root container, it will not emit 'overlapping ranges in state ...' warning when overlapping shard ranges are parent-children during the past time period of reclaim_age. Change-Id: I88bce3c178923457e3887790fa58aad56086b1e6 --- swift/container/sharder.py | 57 ++++++++++- test/unit/container/test_sharder.py | 144 +++++++++++++++++++++++++++- 2 files changed, 195 insertions(+), 6 deletions(-) diff --git a/swift/container/sharder.py b/swift/container/sharder.py index ab25a19ca5..847bc1fb4a 100644 --- a/swift/container/sharder.py +++ b/swift/container/sharder.py @@ -158,19 +158,55 @@ def find_paths_with_gaps(shard_ranges, within_range=None): return paths_with_gaps -def find_overlapping_ranges(shard_ranges): +def _is_parent_or_child(shard_range, other, time_period): + """ + Test if shard range ``shard_range`` is the parent or a child of another + shard range ``other`` within past time period ``time_period``. This method + is limited to work only within the scope of the same user-facing account + (with and without shard prefix). + + :param shard_range: an instance of ``ShardRange``. + :param other: an instance of ``ShardRange``. + :param time_period: the specified past time period in seconds. Value of + 0 means all time in the past. + :return: True if ``shard_range`` is the parent or a child of ``other`` + within past time period, False otherwise, assuming that they are within + the same account. + """ + exclude_age = (time.time() - float(time_period)) if time_period > 0 else 0 + if shard_range.is_child_of(other) and shard_range.timestamp >= exclude_age: + return True + if other.is_child_of(shard_range) and other.timestamp >= exclude_age: + return True + return False + + +def find_overlapping_ranges( + shard_ranges, exclude_parent_child=False, time_period=0): """ Find all pairs of overlapping ranges in the given list. :param shard_ranges: A list of :class:`~swift.utils.ShardRange` + :param exclude_parent_child: If True then overlapping pairs that have a + parent-child relationship within the past time period + ``time_period`` are excluded from the returned set. Default is + False. + :param time_period: the specified past time period in seconds. Value of + 0 means all time in the past. :return: a set of tuples, each tuple containing ranges that overlap with each other. """ result = set() for i, shard_range in enumerate(shard_ranges): - overlapping = [ - sr for sr in shard_ranges[i + 1:] - if shard_range.name != sr.name and shard_range.overlaps(sr)] + if exclude_parent_child: + overlapping = [ + sr for sr in shard_ranges[i + 1:] + if shard_range.name != sr.name and shard_range.overlaps(sr) and + not _is_parent_or_child(shard_range, sr, time_period)] + else: + overlapping = [ + sr for sr in shard_ranges[i + 1:] + if shard_range.name != sr.name and shard_range.overlaps(sr)] if overlapping: overlapping.append(shard_range) overlapping.sort(key=ShardRange.sort_key) @@ -1199,7 +1235,18 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator): # allow multiple shards in that state continue shard_ranges = broker.get_shard_ranges(states=state) - overlaps = find_overlapping_ranges(shard_ranges) + # Transient overlaps can occur during the period immediately after + # sharding if a root learns about new child shards before it learns + # that the parent has sharded. These overlaps are normally + # corrected as an up-to-date version of the parent shard range is + # replicated to the root. Parent-child overlaps are therefore + # ignored for a reclaim age after the child was created. After + # that, parent-child overlaps may indicate that there is + # permanently stale parent shard range data, perhaps from a node + # that has been offline, so these are reported. + overlaps = find_overlapping_ranges( + shard_ranges, exclude_parent_child=True, + time_period=self.reclaim_age) if overlaps: self._increment_stat('audit_root', 'has_overlap') self._update_stat('audit_root', 'num_overlap', diff --git a/test/unit/container/test_sharder.py b/test/unit/container/test_sharder.py index cd97f00cd8..a497b1a427 100644 --- a/test/unit/container/test_sharder.py +++ b/test/unit/container/test_sharder.py @@ -43,7 +43,7 @@ from swift.container.sharder import ContainerSharder, sharding_enabled, \ find_shrinking_candidates, process_compactible_shard_sequences, \ find_compactible_shard_sequences, is_shrinking_candidate, \ is_sharding_candidate, find_paths, rank_paths, ContainerSharderConf, \ - find_paths_with_gaps, combine_shard_ranges + find_paths_with_gaps, combine_shard_ranges, find_overlapping_ranges from swift.common.utils import ShardRange, Timestamp, hash_path, \ encode_timestamps, parse_db_filename, quorum_size, Everything, md5, \ ShardName @@ -5562,6 +5562,63 @@ class TestSharder(BaseTestSharder): broker.merge_shard_ranges(shrinking_shard_ranges) check_missing() + def test_audit_root_container_with_parent_child_overlapping(self): + # Test '_audit_root_container' when overlapping shard ranges are + # parent and children, expect no warnings. The case of non parent-child + # overlapping is tested in 'test_audit_root_container'. + now_ts = next(self.ts_iter) + past_ts = Timestamp(float(now_ts) - 604801) + root_sr = ShardRange('a/c', past_ts, state=ShardRange.SHARDED) + parent_range = ShardRange(ShardRange.make_path( + '.shards_a', 'c', root_sr.container, + past_ts, 0), + past_ts, 'a', 'f', object_count=1, + state=ShardRange.CLEAVED) + child_ranges = [ + ShardRange( + ShardRange.make_path( + '.shards_a', 'c', parent_range.container, past_ts, 0), + past_ts, lower='a', upper='c', object_count=1, + state=ShardRange.CLEAVED), + ShardRange( + ShardRange.make_path( + '.shards_a', 'c', parent_range.container, past_ts, 1), + past_ts, lower='c', upper='f', object_count=1, + state=ShardRange.CLEAVED)] + self.assertTrue(find_overlapping_ranges([parent_range] + child_ranges)) + broker = self._make_broker() + + # The case of transient overlapping within reclaim_age. + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'has_overlap': 0, 'num_overlap': 0} + broker.merge_shard_ranges([parent_range] + child_ranges) + with mock.patch('swift.container.sharder.time.time', + return_value=float(now_ts) - 10): + with self._mock_sharder() as sharder: + with mock.patch.object( + sharder, '_audit_shard_container') as mocked: + sharder._audit_container(broker) + self._assert_stats(expected_stats, sharder, 'audit_root') + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + mocked.assert_not_called() + + # The case of overlapping past reclaim_age. + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1, + 'has_overlap': 1, 'num_overlap': 2} + with mock.patch('swift.container.sharder.time.time', + return_value=float(now_ts)): + with self._mock_sharder() as sharder: + with mock.patch.object( + sharder, '_audit_shard_container') as mocked: + sharder._audit_container(broker) + lines = sharder.logger.get_lines_for_level('warning') + self.assertIn('Audit failed for root', lines[0]) + self.assertFalse(lines[1:]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + self._assert_stats(expected_stats, sharder, 'audit_root') + mocked.assert_not_called() + def test_audit_deleted_root_container(self): broker = self._make_broker() shard_bounds = ( @@ -8708,6 +8765,91 @@ class TestSharderFunctions(BaseTestSharder): paths_with_gaps = find_paths_with_gaps(ranges, range_of_interest) self.assertFalse(paths_with_gaps) + def test_find_overlapping_ranges(self): + now_ts = next(self.ts_iter) + past_ts = Timestamp(float(now_ts) - 61) + root_sr = ShardRange('a/c', past_ts, state=ShardRange.SHARDED) + bounds = ( + ('', 'a'), + ('a', 'f'), # the 'parent_range' in this test. + ('f', 'm'), # shard range overlaps with the next. + ('k', 'p'), + ('p', 'y'), + ('y', '') + ) + ranges = [ + ShardRange( + ShardRange.make_path( + '.shards_a', 'c', root_sr.container, past_ts, + index), + past_ts, lower, upper, object_count=1, + state=ShardRange.SHARDED) + for index, (lower, upper) in enumerate(bounds)] + parent_range = ranges[1] + child_ranges = [ + ShardRange( + ShardRange.make_path( + '.shards_a', 'c', parent_range.container, past_ts, 0), + past_ts, lower='a', upper='c', object_count=1, + state=ShardRange.CLEAVED), + ShardRange( + ShardRange.make_path( + '.shards_a', 'c', parent_range.container, past_ts, 1), + past_ts, lower='c', upper='f', object_count=1, + state=ShardRange.CLEAVED)] + overlapping_ranges = find_overlapping_ranges(ranges) + self.assertEqual({(ranges[2], ranges[3])}, overlapping_ranges) + overlapping_ranges = find_overlapping_ranges( + [ranges[1]] + child_ranges) + self.assertEqual( + {(child_ranges[0], child_ranges[1], ranges[1])}, + overlapping_ranges) + overlapping_ranges = find_overlapping_ranges( + [ranges[1]] + child_ranges, exclude_parent_child=True) + self.assertEqual(0, len(overlapping_ranges)) + with mock.patch( + 'swift.container.sharder.time.time', + return_value=float(now_ts)): + overlapping_ranges = find_overlapping_ranges( + [ranges[1]] + child_ranges, exclude_parent_child=True, + time_period=61) + self.assertEqual(0, len(overlapping_ranges)) + overlapping_ranges = find_overlapping_ranges( + [ranges[1]] + child_ranges, exclude_parent_child=True, + time_period=60) + self.assertEqual( + {(child_ranges[0], child_ranges[1], ranges[1])}, + overlapping_ranges) + overlapping_ranges = find_overlapping_ranges( + ranges + child_ranges) + self.assertEqual( + {(child_ranges[0], + child_ranges[1], + ranges[1]), + (ranges[2], + ranges[3])}, + overlapping_ranges) + overlapping_ranges = find_overlapping_ranges( + ranges + child_ranges, exclude_parent_child=True) + self.assertEqual({(ranges[2], ranges[3])}, overlapping_ranges) + with mock.patch( + 'swift.container.sharder.time.time', + return_value=float(now_ts)): + overlapping_ranges = find_overlapping_ranges( + ranges + child_ranges, exclude_parent_child=True, + time_period=61) + self.assertEqual({(ranges[2], ranges[3])}, overlapping_ranges) + overlapping_ranges = find_overlapping_ranges( + ranges + child_ranges, exclude_parent_child=True, + time_period=60) + self.assertEqual( + {(child_ranges[0], + child_ranges[1], + ranges[1]), + (ranges[2], + ranges[3])}, + overlapping_ranges) + class TestContainerSharderConf(unittest.TestCase): def test_default(self):