diff --git a/swift/cli/manage_shard_ranges.py b/swift/cli/manage_shard_ranges.py index 69e9f5c970..9bfd89e543 100644 --- a/swift/cli/manage_shard_ranges.py +++ b/swift/cli/manage_shard_ranges.py @@ -167,7 +167,7 @@ from six.moves import input from swift.common.utils import Timestamp, get_logger, ShardRange, readconf, \ - ShardRangeList + ShardRangeList, non_negative_int, config_positive_int_value from swift.container.backend import ContainerBroker, UNSHARDED from swift.container.sharder import make_shard_ranges, sharding_enabled, \ CleavingContext, process_compactible_shard_sequences, \ @@ -180,6 +180,8 @@ EXIT_ERROR = 1 EXIT_INVALID_ARGS = 2 # consistent with argparse exit code for invalid args EXIT_USER_QUIT = 3 +MIN_SHARD_RANGE_AGE_FOR_REPAIR = 4 * 3600 + # Some CLI options derive their default values from DEFAULT_SHARDER_CONF if # they have not been set. It is therefore important that the CLI parser # provides None as a default so that we can detect that no value was set on the @@ -206,6 +208,25 @@ class InvalidSolutionException(ManageShardRangesException): self.overlapping_donors = overlapping_donors +def wrap_for_argparse(func, msg=None): + """ + Wrap the given ``func`` to catch any ``ValueError`` and raise an + ``argparse.ArgumentTypeError`` instead. + + :param func: a function. + :param msg: an optional message to use with any exception that is used; if + not given then the string representation of the ValueError will be + used. + :return: a function wrapper. + """ + def wrapped_func(*args, **kwargs): + try: + return func(*args, **kwargs) + except ValueError as err: + raise argparse.ArgumentTypeError(str(err) if msg is None else msg) + return wrapped_func + + def _proceed(args): if args.dry_run: choice = 'no' @@ -543,6 +564,41 @@ def compact_shard_ranges(broker, args): return EXIT_SUCCESS +def _remove_young_overlapping_ranges(acceptor_path, overlapping_donors, args): + # For range shard repair subcommand, check possible parent-children + # relationship between acceptors and donors. + if args.min_shard_age == 0: + return acceptor_path, overlapping_donors + ts_now = Timestamp.now() + # Remove overlapping donor shard ranges who were created recently within + # 'min_shard_age' age limit. + qualified_donors = ShardRangeList( + [sr for sr in overlapping_donors + if float(sr.timestamp) + args.min_shard_age < float(ts_now)]) + young_donors = len(overlapping_donors) - len(qualified_donors) + if young_donors > 0: + print('%d overlapping donor shards ignored due to minimum age ' + 'limit' % young_donors) + if not qualified_donors: + return acceptor_path, None + # Remove those overlapping donors whose overlapping acceptors were created + # within age limit. + possible_parent_donors = set() + for acceptor_sr in acceptor_path: + if float(acceptor_sr.timestamp) + args.min_shard_age < float(ts_now): + continue + possible_parent_donors.update([sr for sr in qualified_donors + if acceptor_sr.overlaps(sr)]) + if possible_parent_donors: + qualified_donors = ShardRangeList( + [sr for sr in qualified_donors + if sr not in possible_parent_donors]) + print('%d donor shards ignored due to existence of overlapping young ' + 'acceptors' % len(possible_parent_donors)) + + return acceptor_path, qualified_donors + + def _find_overlapping_donors(shard_ranges, own_sr, args): shard_ranges = ShardRangeList(shard_ranges) if ShardRange.SHARDING in shard_ranges.states: @@ -593,7 +649,8 @@ def _find_overlapping_donors(shard_ranges, own_sr, args): 'Isolated cleaved and/or active shard ranges in donor ranges', acceptor_path, overlapping_donors) - return acceptor_path, overlapping_donors + return _remove_young_overlapping_ranges( + acceptor_path, overlapping_donors, args) def _fix_gaps(broker, args, paths_with_gaps): @@ -775,13 +832,6 @@ def analyze_shard_ranges(args): return EXIT_SUCCESS -def _positive_int(arg): - val = int(arg) - if val <= 0: - raise argparse.ArgumentTypeError('must be > 0') - return val - - def _add_find_args(parser): parser.add_argument( 'rows_per_shard', nargs='?', type=int, default=USE_SHARDER_DEFAULT, @@ -790,7 +840,8 @@ def _add_find_args(parser): 'given in a conf file specified with --config, otherwise %s.' % DEFAULT_SHARDER_CONF['rows_per_shard']) parser.add_argument( - '--minimum-shard-size', type=_positive_int, + '--minimum-shard-size', + type=wrap_for_argparse(config_positive_int_value, 'must be > 0'), default=USE_SHARDER_DEFAULT, help='Minimum size of the final shard range. If this is greater than ' 'one then the final shard range may be extended to more than ' @@ -839,11 +890,12 @@ def _add_prompt_args(parser): def _add_max_expanding_arg(parser): - parser.add_argument('--max-expanding', nargs='?', - type=_positive_int, - default=USE_SHARDER_DEFAULT, - help='Maximum number of shards that should be ' - 'expanded. Defaults to unlimited.') + parser.add_argument( + '--max-expanding', nargs='?', + type=wrap_for_argparse(config_positive_int_value, 'must be > 0'), + default=USE_SHARDER_DEFAULT, + help='Maximum number of shards that should be ' + 'expanded. Defaults to unlimited.') def _make_parser(): @@ -940,32 +992,35 @@ def _make_parser(): 'of rows. This command only works on root containers.') _add_prompt_args(compact_parser) compact_parser.add_argument( - '--shrink-threshold', nargs='?', type=_positive_int, + '--shrink-threshold', nargs='?', + type=wrap_for_argparse(config_positive_int_value, 'must be > 0'), default=USE_SHARDER_DEFAULT, help='The number of rows below which a shard can qualify for ' - 'shrinking. ' - 'Defaults to %d' % DEFAULT_SHARDER_CONF['shrink_threshold']) + 'shrinking. ' + 'Defaults to %d' % DEFAULT_SHARDER_CONF['shrink_threshold']) compact_parser.add_argument( - '--expansion-limit', nargs='?', type=_positive_int, + '--expansion-limit', nargs='?', + type=wrap_for_argparse(config_positive_int_value, 'must be > 0'), default=USE_SHARDER_DEFAULT, help='Maximum number of rows for an expanding shard to have after ' - 'compaction has completed. ' - 'Defaults to %d' % DEFAULT_SHARDER_CONF['expansion_limit']) + 'compaction has completed. ' + 'Defaults to %d' % DEFAULT_SHARDER_CONF['expansion_limit']) # If just one donor shard is chosen to shrink to an acceptor then the # expanded acceptor will handle object listings as soon as the donor shard # has shrunk. If more than one donor shard are chosen to shrink to an # acceptor then the acceptor may not handle object listings for some donor # shards that have shrunk until *all* donors have shrunk, resulting in # temporary gap(s) in object listings where the shrunk donors are missing. - compact_parser.add_argument('--max-shrinking', nargs='?', - type=_positive_int, - default=USE_SHARDER_DEFAULT, - help='Maximum number of shards that should be ' - 'shrunk into each expanding shard. ' - 'Defaults to 1. Using values greater ' - 'than 1 may result in temporary gaps in ' - 'object listings until all selected ' - 'shards have shrunk.') + compact_parser.add_argument( + '--max-shrinking', nargs='?', + type=wrap_for_argparse(config_positive_int_value, 'must be > 0'), + default=USE_SHARDER_DEFAULT, + help='Maximum number of shards that should be ' + 'shrunk into each expanding shard. ' + 'Defaults to 1. Using values greater ' + 'than 1 may result in temporary gaps in ' + 'object listings until all selected ' + 'shards have shrunk.') _add_max_expanding_arg(compact_parser) compact_parser.set_defaults(func=compact_shard_ranges) @@ -975,6 +1030,14 @@ def _make_parser(): help='Repair overlapping shard ranges. No action will be taken ' 'without user confirmation unless the -y option is used.') _add_prompt_args(repair_parser) + repair_parser.add_argument( + '--min-shard-age', nargs='?', + type=wrap_for_argparse(non_negative_int, 'must be >= 0'), + default=MIN_SHARD_RANGE_AGE_FOR_REPAIR, + help='Minimum age of a shard for it to be considered as an overlap ' + 'that is due for repair. Overlapping shards younger than this ' + 'age will be ignored. Value of 0 means no recent shards will be ' + 'ignored. Defaults to %d.' % MIN_SHARD_RANGE_AGE_FOR_REPAIR) # TODO: maybe this should be a separate subcommand given that it needs # some extra options vs repairing overlaps? repair_parser.add_argument( @@ -988,6 +1051,14 @@ def _make_parser(): 'analyze', help='Analyze shard range json data read from file. Use -v to see ' 'more detailed analysis.') + analyze_parser.add_argument( + '--min-shard-age', nargs='?', + type=wrap_for_argparse(non_negative_int, 'must be >= 0'), + default=0, + help='Minimum age of a shard for it to be considered as an overlap ' + 'that is due for repair. Overlapping shards younger than this ' + 'age will be ignored. Value of 0 means no recent shards will be ' + 'ignored. Defaults to 0.') analyze_parser.set_defaults(func=analyze_shard_ranges) return parser diff --git a/test/probe/test_sharder.py b/test/probe/test_sharder.py index 46e83556f5..68cace6ff5 100644 --- a/test/probe/test_sharder.py +++ b/test/probe/test_sharder.py @@ -3142,7 +3142,8 @@ class TestManagedContainerSharding(BaseTestContainerSharding): # path with most cleaving progress, and so shrink shard ranges 0.*. db_file = self.get_db_file(self.brain.part, self.brain.nodes[0]) self.assert_subprocess_success( - ['swift-manage-shard-ranges', db_file, 'repair', '--yes']) + ['swift-manage-shard-ranges', db_file, 'repair', '--yes', + '--min-shard-age', '0']) # make sure all root replicas now sync their shard ranges self.replicators.once() @@ -3397,7 +3398,8 @@ class TestManagedContainerSharding(BaseTestContainerSharding): # container db_file = self.get_db_file(self.brain.part, self.brain.nodes[0]) self.assert_subprocess_success( - ['swift-manage-shard-ranges', db_file, 'repair', '--yes']) + ['swift-manage-shard-ranges', db_file, 'repair', '--yes', + '--min-shard-age', '0']) self.replicators.once() self.sharders_once() self.sharders_once() @@ -3539,8 +3541,9 @@ class TestManagedContainerSharding(BaseTestContainerSharding): for sr in root_brokers[0].get_shard_ranges(include_deleted=True)]) # we are allowed to fix the overlap... - msg = self.assert_subprocess_success([ - 'swift-manage-shard-ranges', root_0_db_file, 'repair', '--yes']) + msg = self.assert_subprocess_success( + ['swift-manage-shard-ranges', root_0_db_file, 'repair', '--yes', + '--min-shard-age', '0']) self.assertIn( b'Repairs necessary to remove overlapping shard ranges.', msg) @@ -3586,7 +3589,7 @@ class TestManagedContainerSharding(BaseTestContainerSharding): msg = self.assert_subprocess_success([ 'swift-manage-shard-ranges', root_0_db_file, 'repair', - '--dry-run']) + '--dry-run', '--min-shard-age', '0']) self.assertIn(b'No repairs necessary.', msg) msg = self.assert_subprocess_success([ 'swift-manage-shard-ranges', root_0_db_file, 'repair', '--gaps', diff --git a/test/unit/cli/test_manage_shard_ranges.py b/test/unit/cli/test_manage_shard_ranges.py index bb35c6c8d3..469b8da59d 100644 --- a/test/unit/cli/test_manage_shard_ranges.py +++ b/test/unit/cli/test_manage_shard_ranges.py @@ -2137,7 +2137,9 @@ class TestManageShardRanges(unittest.TestCase): mock_timestamp_now(ts_now), \ mock.patch('swift.cli.manage_shard_ranges.input', return_value=user_input): - ret = main([broker.db_file, 'repair'] + options) + ret = main( + [broker.db_file, 'repair', '--min-shard-age', '0'] + + options) self.assertEqual(exit_code, ret) err_lines = err.getvalue().split('\n') self.assert_starts_with(err_lines[0], 'Loaded db broker for ') @@ -2185,6 +2187,172 @@ class TestManageShardRanges(unittest.TestCase): key=ShardRange.sort_key) self.assert_shard_ranges_equal(expected, updated_ranges) + def test_repair_younger_overlapping_donor_shards(self): + # test shard range repair on the normal acceptor ranges and young + # overlapping shard ranges which are younger than '--min-shard-age', + # expect them not to be repaired. + broker = self._make_broker() + broker.set_sharding_sysmeta('Quoted-Root', 'a/c') + ts_now = next(self.ts_iter) + with mock_timestamp_now(Timestamp(float(ts_now) - 61)): + acceptor_ranges = make_shard_ranges( + broker, self.shard_data, '.shards_') + with mock_timestamp_now(ts_now): + overlap_donor_ranges = make_shard_ranges( + broker, self.overlap_shard_data_2, '.shards_') + broker.merge_shard_ranges(acceptor_ranges + overlap_donor_ranges) + self.assertTrue(broker.is_root_container()) + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + ret = main( + [broker.db_file, 'repair', '--min-shard-age', '60', '-y']) + self.assertEqual(0, ret) + err_lines = err.getvalue().split('\n') + self.assert_starts_with(err_lines[0], 'Loaded db broker for ') + out_lines = out.getvalue().split('\n') + self.assertEqual( + ['2 overlapping donor shards ignored due to minimum age limit'], + out_lines[:1]) + updated_ranges = broker.get_shard_ranges() + expected = sorted( + acceptor_ranges + overlap_donor_ranges, + key=ShardRange.sort_key) + self.assert_shard_ranges_equal(expected, updated_ranges) + + def test_repair_younger_acceptor_with_overlapping_donor_shards(self): + # test shard range repair on the overlapping normal donor ranges and + # young acceptor shard ranges who are younger than '--min-shard-age', + # expect no overlapping ranges to be repaired. + broker = self._make_broker() + broker.set_sharding_sysmeta('Quoted-Root', 'a/c') + ts_now = next(self.ts_iter) + with mock_timestamp_now(Timestamp(float(ts_now) + 3601)): + acceptor_ranges = make_shard_ranges( + broker, self.shard_data, '.shards_') + with mock_timestamp_now(ts_now): + overlap_donor_ranges = make_shard_ranges( + broker, self.overlap_shard_data_2, '.shards_') + broker.merge_shard_ranges(acceptor_ranges + overlap_donor_ranges) + self.assertTrue(broker.is_root_container()) + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), \ + mock.patch('sys.stderr', err), \ + mock_timestamp_now(Timestamp(float(ts_now) + 3601 + 59)): + ret = main( + [broker.db_file, 'repair', '--min-shard-age', '60', '-y']) + self.assertEqual(0, ret) + err_lines = err.getvalue().split('\n') + self.assert_starts_with(err_lines[0], 'Loaded db broker for ') + out_lines = out.getvalue().split('\n') + self.assertEqual( + ['2 donor shards ignored due to existence of overlapping young' + ' acceptors'], out_lines[:1]) + updated_ranges = broker.get_shard_ranges() + expected = sorted( + acceptor_ranges + overlap_donor_ranges, + key=ShardRange.sort_key) + self.assert_shard_ranges_equal(expected, updated_ranges) + + def test_repair_older_overlapping_donor_and_acceptor_shards(self): + # test shard range repair on the overlapping donor and acceptor shard + # ranges which all are older than '--min-shard-age', expect them to be + # repaired. + broker = self._make_broker() + broker.set_sharding_sysmeta('Quoted-Root', 'a/c') + ts_now = next(self.ts_iter) + with mock_timestamp_now(ts_now): + acceptor_ranges = make_shard_ranges( + broker, self.shard_data, '.shards_') + with mock_timestamp_now(Timestamp(float(ts_now) + 1800)): + overlap_donor_ranges = make_shard_ranges( + broker, self.overlap_shard_data_2, '.shards_') + broker.merge_shard_ranges(acceptor_ranges + overlap_donor_ranges) + self.assertTrue(broker.is_root_container()) + out = StringIO() + err = StringIO() + ts_1hr_after = Timestamp(float(ts_now) + 3601) + with mock.patch('sys.stdout', out), \ + mock.patch('sys.stderr', err), \ + mock_timestamp_now(ts_1hr_after): + ret = main( + [broker.db_file, 'repair', '--min-shard-age', '60', '-y']) + self.assertEqual(0, ret) + err_lines = err.getvalue().split('\n') + self.assert_starts_with(err_lines[0], 'Loaded db broker for ') + out_lines = out.getvalue().split('\n') + self.assertEqual( + ['Repairs necessary to remove overlapping shard ranges.'], + out_lines[:1]) + updated_ranges = broker.get_shard_ranges() + for sr in overlap_donor_ranges: + sr.update_state(ShardRange.SHRINKING, ts_1hr_after) + sr.epoch = ts_1hr_after + expected = sorted( + acceptor_ranges + overlap_donor_ranges, + key=ShardRange.sort_key) + self.assert_shard_ranges_equal(expected, updated_ranges) + + def test_repair_overlapping_donor_and_acceptor_shards_default(self): + # test shard range repair on the overlapping donor and acceptor shard + # ranges wth default '--min-shard-age' value. + broker = self._make_broker() + broker.set_sharding_sysmeta('Quoted-Root', 'a/c') + ts_now = next(self.ts_iter) + with mock_timestamp_now(ts_now): + acceptor_ranges = make_shard_ranges( + broker, self.shard_data, '.shards_') + with mock_timestamp_now(Timestamp(int(ts_now) + 1)): + overlap_donor_ranges = make_shard_ranges( + broker, self.overlap_shard_data_2, '.shards_') + broker.merge_shard_ranges(acceptor_ranges + overlap_donor_ranges) + self.assertTrue(broker.is_root_container()) + out = StringIO() + err = StringIO() + ts_repair = Timestamp(int(ts_now) + 4 * 3600 - 1) + with mock.patch('sys.stdout', out), \ + mock.patch('sys.stderr', err), \ + mock_timestamp_now(ts_repair): + # default min-shard-age prevents repair... + ret = main([broker.db_file, 'repair', '-y']) + self.assertEqual(0, ret) + err_lines = err.getvalue().split('\n') + self.assert_starts_with(err_lines[0], 'Loaded db broker for ') + out_lines = out.getvalue().split('\n') + self.assertEqual( + ['2 overlapping donor shards ignored due to minimum age limit'], + out_lines[:1]) + updated_ranges = broker.get_shard_ranges() + expected = sorted( + acceptor_ranges + overlap_donor_ranges, + key=ShardRange.sort_key) + self.assert_shard_ranges_equal(expected, updated_ranges) + + out = StringIO() + err = StringIO() + ts_repair = Timestamp(int(ts_now) + 4 * 3600 + 2) + with mock.patch('sys.stdout', out), \ + mock.patch('sys.stderr', err), \ + mock_timestamp_now(ts_repair): + # default min-shard-age allows repair now... + ret = main([broker.db_file, 'repair', '-y']) + self.assertEqual(0, ret) + err_lines = err.getvalue().split('\n') + self.assert_starts_with(err_lines[0], 'Loaded db broker for ') + out_lines = out.getvalue().split('\n') + self.assertEqual( + ['Repairs necessary to remove overlapping shard ranges.'], + out_lines[:1]) + updated_ranges = broker.get_shard_ranges() + for sr in overlap_donor_ranges: + sr.update_state(ShardRange.SHRINKING, ts_repair) + sr.epoch = ts_repair + expected = sorted( + acceptor_ranges + overlap_donor_ranges, + key=ShardRange.sort_key) + self.assert_shard_ranges_equal(expected, updated_ranges) + def test_repair_two_complete_sequences_one_incomplete(self): broker = self._make_broker() broker.set_sharding_sysmeta('Quoted-Root', 'a/c') @@ -2205,7 +2373,8 @@ class TestManageShardRanges(unittest.TestCase): ts_now = next(self.ts_iter) with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err), \ mock_timestamp_now(ts_now): - ret = main([broker.db_file, 'repair', '--yes']) + ret = main([broker.db_file, 'repair', '--yes', + '--min-shard-age', '0']) self.assertEqual(0, ret) err_lines = err.getvalue().split('\n') self.assert_starts_with(err_lines[0], 'Loaded db broker for ')