swift-manage-shard-ranges repair: check for parent-child overlaps.

Stuck shard ranges have been seen in the production, root cause has been
traced back to that s-m-s-r failed to detect parent-child relationship
in overlaps and it either shrinked child shard ranges into parents or
the other way around. A patch has been added to check minimum age before
s-m-s-r performs repair, which will most likely prevent this from
happening again, but we also need to check for parent-child relationship
in overlaps explicitly during repairs. This patch will do that and
remove parent or child shard ranges from doners, and prevent s-m-s-r
from shrinking them into acceptor shard ranges.

Drive-by 1: fixup gap repair probe test.
The probe test is no longer appropriate because we're no longer
allowed to repair parent-child overlaps, so replace the test with a
manually created gap.

Drive-by 2: address probe test TODOs.
The commented assertion would fail because the node filtering
comparison failed to account for the same node having different indexes
when generated for the root versus the shard. Adding a new iterable
function filter_nodes makes the node filtering behave as expected.

Co-Authored-By: Alistair Coles <alistairncoles@gmail.com>
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>

Change-Id: Iaa89e94a2746ba939fb62449e24bdab9666d7bab
This commit is contained in:
Jianjian Huo 2022-08-24 12:34:04 -07:00
parent c9b898972f
commit a53270a15a
5 changed files with 251 additions and 36 deletions

View File

@ -564,9 +564,25 @@ def compact_shard_ranges(broker, args):
return EXIT_SUCCESS
def _remove_young_overlapping_ranges(acceptor_path, overlapping_donors, args):
# For range shard repair subcommand, check possible parent-children
# relationship between acceptors and donors.
def _remove_illegal_overlapping_donors(
acceptor_path, overlapping_donors, args):
# Check parent-children relationship in overlaps between acceptors and
# donors, remove any overlapping parent or child shard range from donors.
# Note: we can use set() here, since shard range object is hashed by
# id and all shard ranges in overlapping_donors are unique already.
parent_child_donors = set()
for acceptor in acceptor_path:
parent_child_donors.update(
[donor for donor in overlapping_donors
if acceptor.is_child_of(donor) or donor.is_child_of(acceptor)])
if parent_child_donors:
overlapping_donors = ShardRangeList(
[sr for sr in overlapping_donors
if sr not in parent_child_donors])
print('%d donor shards ignored due to parent-child relationship '
'checks' % len(parent_child_donors))
# Check minimum age requirement in overlaps between acceptors and donors.
if args.min_shard_age == 0:
return acceptor_path, overlapping_donors
ts_now = Timestamp.now()
@ -583,18 +599,18 @@ def _remove_young_overlapping_ranges(acceptor_path, overlapping_donors, args):
return acceptor_path, None
# Remove those overlapping donors whose overlapping acceptors were created
# within age limit.
possible_parent_donors = set()
donors_with_young_overlap_acceptor = set()
for acceptor_sr in acceptor_path:
if float(acceptor_sr.timestamp) + args.min_shard_age < float(ts_now):
continue
possible_parent_donors.update([sr for sr in qualified_donors
if acceptor_sr.overlaps(sr)])
if possible_parent_donors:
donors_with_young_overlap_acceptor.update(
[sr for sr in qualified_donors if acceptor_sr.overlaps(sr)])
if donors_with_young_overlap_acceptor:
qualified_donors = ShardRangeList(
[sr for sr in qualified_donors
if sr not in possible_parent_donors])
if sr not in donors_with_young_overlap_acceptor])
print('%d donor shards ignored due to existence of overlapping young '
'acceptors' % len(possible_parent_donors))
'acceptors' % len(donors_with_young_overlap_acceptor))
return acceptor_path, qualified_donors
@ -649,7 +665,7 @@ def _find_overlapping_donors(shard_ranges, own_sr, args):
'Isolated cleaved and/or active shard ranges in donor ranges',
acceptor_path, overlapping_donors)
return _remove_young_overlapping_ranges(
return _remove_illegal_overlapping_donors(
acceptor_path, overlapping_donors, args)

View File

@ -5386,7 +5386,7 @@ class ShardRange(object):
Test if this shard range is a child of another shard range. The
parent-child relationship is inferred from the names of the shard
ranges. This method is limited to work only within the scope of the
same account.
same user-facing account (with and without shard prefix).
:param parent: an instance of ``ShardRange``.
:return: True if ``parent`` is the parent of this shard range, False

View File

@ -18,6 +18,7 @@ from __future__ import print_function
import errno
import gc
import json
import mock
import os
from subprocess import Popen, PIPE
import sys
@ -345,6 +346,27 @@ class Body(object):
next = __next__
def exclude_nodes(nodes, *excludes):
"""
Iterate over ``nodes`` yielding only those not in ``excludes``.
The index key of the node dicts is ignored when matching nodes against the
``excludes`` nodes. Index is not a fundamental property of a node but a
variable annotation added by the Ring depending upon the partition for
which the nodes were generated.
:param nodes: an iterable of node dicts.
:param *excludes: one or more node dicts that should not be yielded.
:return: yields node dicts.
"""
for node in nodes:
match_node = {k: mock.ANY if k == 'index' else v
for k, v in node.items()}
if any(exclude == match_node for exclude in excludes):
continue
yield node
class ProbeTest(unittest.TestCase):
"""
Don't instantiate this directly, use a child class instead.

View File

@ -41,7 +41,7 @@ from test import annotate_failure
from test.probe import PROXY_BASE_URL
from test.probe.brain import BrainSplitter
from test.probe.common import ReplProbeTest, get_server_number, \
wait_for_server_to_hangup, ENABLED_POLICIES
wait_for_server_to_hangup, ENABLED_POLICIES, exclude_nodes
import mock
try:
@ -3427,9 +3427,9 @@ class TestManagedContainerSharding(BaseTestContainerSharding):
self.assert_container_listing(expected_obj_names)
def test_manage_shard_ranges_repair_root_shrinking_gaps(self):
# provoke shrinking/shrunk gaps by prematurely repairing a transient
# overlap in root container; repair the gap.
def test_manage_shard_ranges_repair_parent_child_ranges(self):
# Test repairing a transient parent-child shard range overlap in the
# root container, expect no repairs to be done.
# note: be careful not to add a container listing to this test which
# would get shard ranges into memcache
obj_names = self._make_object_names(4)
@ -3477,19 +3477,17 @@ class TestManagedContainerSharding(BaseTestContainerSharding):
shard_ranges[0].account, shard_ranges[0].container)
self.container_replicators.once(
additional_args='--partitions=%s' % shard_part)
# TODO: get this assertion working (node filtering wonky??)
# for node in [n for n in shard_nodes if n != self.brain.nodes[0]]:
# self.assert_container_state(
# node, 'unsharded', 2, account=shard_ranges[0].account,
# container=shard_ranges[0].container, part=shard_part)
for node in exclude_nodes(shard_nodes, self.brain.nodes[0]):
self.assert_container_state(
node, 'unsharded', 2, account=shard_ranges[0].account,
container=shard_ranges[0].container, part=shard_part)
self.sharders_once(additional_args='--partitions=%s' % shard_part)
# get shards to update state from parent...
self.sharders_once()
# TODO: get this assertion working (node filtering wonky??)
# for node in [n for n in shard_nodes if n != self.brain.nodes[0]]:
# self.assert_container_state(
# node, 'sharded', 2, account=shard_ranges[0].account,
# container=shard_ranges[0].container, part=shard_part)
for node in exclude_nodes(shard_nodes, self.brain.nodes[0]):
self.assert_container_state(
node, 'sharded', 2, account=shard_ranges[0].account,
container=shard_ranges[0].container, part=shard_part)
# put an object into the second of the 2 sub-shards so that the shard
# will update the root next time the sharder is run; do this before
@ -3540,34 +3538,96 @@ class TestManagedContainerSharding(BaseTestContainerSharding):
[(sr.state, sr.deleted, sr.lower, sr.upper)
for sr in root_brokers[0].get_shard_ranges(include_deleted=True)])
# we are allowed to fix the overlap...
# try to fix the overlap and expect no repair has been done.
msg = self.assert_subprocess_success(
['swift-manage-shard-ranges', root_0_db_file, 'repair', '--yes',
'--min-shard-age', '0'])
self.assertIn(
b'Repairs necessary to remove overlapping shard ranges.', msg)
b'1 donor shards ignored due to parent-child relationship checks',
msg)
# verify parent-child checks has prevented repair to be done.
self.assertEqual(
[(ShardRange.ACTIVE, False, ShardRange.MIN, obj_names[1]),
(ShardRange.SHRINKING, False, obj_names[0], obj_names[1]),
# note: overlap!
(ShardRange.ACTIVE, False, obj_names[0], obj_names[1]),
(ShardRange.ACTIVE, False, obj_names[1], ShardRange.MAX)],
[(sr.state, sr.deleted, sr.lower, sr.upper)
for sr in root_brokers[0].get_shard_ranges(include_deleted=True)])
# the transient overlap is 'fixed' in subsequent sharder cycles...
self.sharders_once()
self.sharders_once()
self.container_replicators.once()
# boo :'( ... we made gap
for broker in root_brokers:
self.assertEqual(
[(ShardRange.ACTIVE, False, ShardRange.MIN, obj_names[0]),
(ShardRange.ACTIVE, False, obj_names[0], obj_names[1]),
(ShardRange.SHARDED, True, ShardRange.MIN, obj_names[1]),
(ShardRange.SHRUNK, True, obj_names[0], obj_names[1]),
(ShardRange.ACTIVE, False, obj_names[1], ShardRange.MAX)],
[(sr.state, sr.deleted, sr.lower, sr.upper)
for sr in broker.get_shard_ranges(include_deleted=True)])
def test_manage_shard_ranges_repair_root_gap(self):
# create a gap in root container; repair the gap.
# note: be careful not to add a container listing to this test which
# would get shard ranges into memcache
obj_names = self._make_object_names(8)
self.put_objects(obj_names)
client.post_container(self.url, self.admin_token, self.container_name,
headers={'X-Container-Sharding': 'on'})
# run replicators first time to get sync points set
self.container_replicators.once(
additional_args='--partitions=%s' % self.brain.part)
# shard root
root_0_db_file = self.get_db_file(self.brain.part, self.brain.nodes[0])
self.assert_subprocess_success([
'swift-manage-shard-ranges',
root_0_db_file,
'find_and_replace', '2', '--enable'])
self.container_replicators.once(
additional_args='--partitions=%s' % self.brain.part)
for node in self.brain.nodes:
self.assert_container_state(node, 'unsharded', 4)
self.sharders_once(additional_args='--partitions=%s' % self.brain.part)
# get shards to update state from parent...
self.sharders_once()
for node in self.brain.nodes:
self.assert_container_state(node, 'sharded', 4)
# sanity check, all is well
msg = self.assert_subprocess_success([
'swift-manage-shard-ranges', root_0_db_file, 'repair', '--gaps',
'--dry-run'])
self.assertIn(b'No repairs necessary.', msg)
# deliberately create a gap in root shard ranges (don't ever do this
# for real)
# TODO: replace direct broker modification with s-m-s-r merge
root_brokers = [self.get_broker(self.brain.part, node)
for node in self.brain.nodes]
shard_ranges = root_brokers[0].get_shard_ranges()
self.assertEqual(4, len(shard_ranges))
shard_ranges[2].set_deleted()
root_brokers[0].merge_shard_ranges(shard_ranges)
shard_ranges = root_brokers[0].get_shard_ranges()
self.assertEqual(3, len(shard_ranges))
self.container_replicators.once()
# confirm that we made a gap.
for broker in root_brokers:
self.assertEqual(
[(ShardRange.ACTIVE, False, ShardRange.MIN, obj_names[1]),
(ShardRange.ACTIVE, False, obj_names[1], obj_names[3]),
(ShardRange.ACTIVE, True, obj_names[3], obj_names[5]),
(ShardRange.ACTIVE, False, obj_names[5], ShardRange.MAX)],
[(sr.state, sr.deleted, sr.lower, sr.upper)
for sr in broker.get_shard_ranges(include_deleted=True)])
msg = self.assert_subprocess_success([
'swift-manage-shard-ranges', root_0_db_file, 'repair', '--gaps',
'--yes'])
@ -3580,10 +3640,10 @@ class TestManagedContainerSharding(BaseTestContainerSharding):
# yay! we fixed the gap (without creating an overlap)
for broker in root_brokers:
self.assertEqual(
[(ShardRange.ACTIVE, False, ShardRange.MIN, obj_names[0]),
(ShardRange.SHARDED, True, ShardRange.MIN, obj_names[1]),
(ShardRange.SHRUNK, True, obj_names[0], obj_names[1]),
(ShardRange.ACTIVE, False, obj_names[0], ShardRange.MAX)],
[(ShardRange.ACTIVE, False, ShardRange.MIN, obj_names[1]),
(ShardRange.ACTIVE, False, obj_names[1], obj_names[3]),
(ShardRange.ACTIVE, True, obj_names[3], obj_names[5]),
(ShardRange.ACTIVE, False, obj_names[3], ShardRange.MAX)],
[(sr.state, sr.deleted, sr.lower, sr.upper)
for sr in broker.get_shard_ranges(include_deleted=True)])
@ -3596,8 +3656,14 @@ class TestManagedContainerSharding(BaseTestContainerSharding):
'--dry-run'])
self.assertIn(b'No repairs necessary.', msg)
self.assert_container_listing(
[obj_names[0], new_obj_name] + obj_names[1:])
# put an object into the gap namespace
new_objs = [obj_names[4] + 'a']
self.put_objects(new_objs)
# get root stats up to date
self.sharders_once()
# new object is in listing but old objects in the gap have been lost -
# don't delete shard ranges!
self.assert_container_listing(obj_names[:4] + new_objs + obj_names[6:])
def test_manage_shard_ranges_unsharded_deleted_root(self):
# verify that a deleted DB will still be sharded

View File

@ -2391,6 +2391,117 @@ class TestManageShardRanges(unittest.TestCase):
key=ShardRange.sort_key)
self.assert_shard_ranges_equal(expected, updated_ranges)
def test_repair_parent_overlaps_with_children_donors(self):
# Verify that the overlap repair command ignores expected transient
# overlaps between parent shard acceptor and child donor shards.
root_broker = self._make_broker()
root_broker.set_sharding_sysmeta('Quoted-Root', 'a/c')
self.assertTrue(root_broker.is_root_container())
# The parent shard range would have been set to state SHARDING in the
# shard container but is still showing as ACTIVE in the root container.
# (note: it is valid for a single shard to span entire namespace)
ts_parent = next(self.ts_iter)
parent_shard = ShardRange(
ShardRange.make_path('.shards_a', 'c', 'c', ts_parent, 0),
ts_parent, lower='', upper='', object_count=10,
state=ShardRange.ACTIVE)
# Children shards have reported themselves to root as CLEAVING/
# CREATED.
ts_child = next(self.ts_iter)
child_shards = [
ShardRange(
ShardRange.make_path(
'.shards_a', 'c', parent_shard.container, ts_child, 0),
ts_child, lower='', upper='p', object_count=1,
state=ShardRange.CLEAVED),
ShardRange(
ShardRange.make_path(
'.shards_a', 'c', parent_shard.container, ts_child, 1),
ts_child, lower='p', upper='', object_count=1,
state=ShardRange.CLEAVED)]
root_broker.merge_shard_ranges([parent_shard] + child_shards)
out = StringIO()
err = StringIO()
ts_now = next(self.ts_iter)
with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err), \
mock_timestamp_now(ts_now):
ret = main([root_broker.db_file, 'repair',
'--yes', '--min-shard-age', '0'])
err_lines = err.getvalue().split('\n')
out_lines = out.getvalue().split('\n')
self.assertEqual(0, ret, err_lines + out_lines)
self.assert_starts_with(err_lines[0], 'Loaded db broker for ')
self.assertNotIn(
'Repairs necessary to remove overlapping shard ranges.',
out_lines)
self.assertEqual(
['2 donor shards ignored due to parent-child relationship'
' checks'], out_lines[:1])
updated_ranges = root_broker.get_shard_ranges()
# Expect no change to shard ranges.
expected = sorted([parent_shard] + child_shards,
key=ShardRange.sort_key)
self.assert_shard_ranges_equal(expected, updated_ranges)
def test_repair_children_overlaps_with_parent_donor(self):
# Verify that the overlap repair command ignores expected transient
# overlaps between child shard acceptors and parent donor shards.
root_broker = self._make_broker()
root_broker.set_sharding_sysmeta('Quoted-Root', 'a/c')
self.assertTrue(root_broker.is_root_container())
# The parent shard range would have been set to state SHARDING in the
# shard container but is still showing as ACTIVE in the root container.
# (note: it is valid for a single shard to span entire namespace)
ts_parent = next(self.ts_iter)
parent_shard = ShardRange(
ShardRange.make_path('.shards_a', 'c', 'c', ts_parent, 0),
ts_parent, lower='', upper='', object_count=5,
state=ShardRange.ACTIVE)
# Children shards have reported themselves to root as CLEAVING/CREATED,
# but they will end up with becoming acceptor shards due to having more
# objects than the above parent shard.
ts_child = next(self.ts_iter)
child_shards = [
ShardRange(
ShardRange.make_path(
'.shards_a', 'c', parent_shard.container, ts_child, 0),
ts_child, lower='', upper='p', object_count=5,
state=ShardRange.CLEAVED),
ShardRange(
ShardRange.make_path(
'.shards_a', 'c', parent_shard.container, ts_child, 1),
ts_child, lower='p', upper='', object_count=5,
state=ShardRange.CLEAVED)]
root_broker.merge_shard_ranges([parent_shard] + child_shards)
out = StringIO()
err = StringIO()
ts_now = next(self.ts_iter)
with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err), \
mock_timestamp_now(ts_now):
ret = main([root_broker.db_file, 'repair',
'--yes', '--min-shard-age', '0'])
err_lines = err.getvalue().split('\n')
out_lines = out.getvalue().split('\n')
self.assertEqual(0, ret, err_lines + out_lines)
self.assert_starts_with(err_lines[0], 'Loaded db broker for ')
self.assertNotIn(
'Repairs necessary to remove overlapping shard ranges.',
out_lines)
self.assertEqual(
['1 donor shards ignored due to parent-child relationship'
' checks'], out_lines[:1])
updated_ranges = root_broker.get_shard_ranges()
# Expect no change to shard ranges.
expected = sorted([parent_shard] + child_shards,
key=ShardRange.sort_key)
self.assert_shard_ranges_equal(expected, updated_ranges)
@with_tempdir
def test_show_and_analyze(self, tempdir):
broker = self._make_broker()