ShardRange: add is_child_of method
Adds an is_child_of method that infers the parent-child relationship of two shard ranges from their names. This new method is limited to use only under the same account. Co-Authored-By: Jianjian Huo <jhuo@nvidia.com> Change-Id: Iac3a8ec5d8947989b64aa27f40caa3d8d1423a7c
This commit is contained in:
@@ -5140,6 +5140,111 @@ class ShardRangeOuterBound(object):
|
|||||||
__nonzero__ = __bool__
|
__nonzero__ = __bool__
|
||||||
|
|
||||||
|
|
||||||
|
class ShardName(object):
|
||||||
|
"""
|
||||||
|
Encapsulates the components of a shard name.
|
||||||
|
|
||||||
|
Instances of this class would typically be constructed via the create() or
|
||||||
|
parse() class methods.
|
||||||
|
|
||||||
|
Shard names have the form:
|
||||||
|
|
||||||
|
<account>/<root_container>-<parent_container_hash>-<timestamp>-<index>
|
||||||
|
|
||||||
|
Note: not all instances of :class:`~swift.common.utils.ShardRange` have
|
||||||
|
names that will parse as a :class:`~swift.common.utils.ShardName`; root
|
||||||
|
container own shard ranges, for example, have a simpler name format of
|
||||||
|
<account>/<root_container>.
|
||||||
|
"""
|
||||||
|
def __init__(self, account, root_container,
|
||||||
|
parent_container_hash,
|
||||||
|
timestamp,
|
||||||
|
index):
|
||||||
|
self.account = self._validate(account)
|
||||||
|
self.root_container = self._validate(root_container)
|
||||||
|
self.parent_container_hash = self._validate(parent_container_hash)
|
||||||
|
self.timestamp = Timestamp(timestamp)
|
||||||
|
self.index = int(index)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _validate(cls, arg):
|
||||||
|
if arg is None:
|
||||||
|
raise ValueError('arg must not be None')
|
||||||
|
return arg
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return '%s/%s-%s-%s-%s' % (self.account,
|
||||||
|
self.root_container,
|
||||||
|
self.parent_container_hash,
|
||||||
|
self.timestamp.internal,
|
||||||
|
self.index)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def hash_container_name(cls, container_name):
|
||||||
|
"""
|
||||||
|
Calculates the hash of a container name.
|
||||||
|
|
||||||
|
:param container_name: name to be hashed.
|
||||||
|
:return: the hexdigest of the md5 hash of ``container_name``.
|
||||||
|
:raises ValueError: if ``container_name`` is None.
|
||||||
|
"""
|
||||||
|
cls._validate(container_name)
|
||||||
|
if not isinstance(container_name, bytes):
|
||||||
|
container_name = container_name.encode('utf-8')
|
||||||
|
hash = md5(container_name, usedforsecurity=False).hexdigest()
|
||||||
|
return hash
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def create(cls, account, root_container, parent_container,
|
||||||
|
timestamp, index):
|
||||||
|
"""
|
||||||
|
Create an instance of :class:`~swift.common.utils.ShardName`.
|
||||||
|
|
||||||
|
:param account: the hidden internal account to which the shard
|
||||||
|
container belongs.
|
||||||
|
:param root_container: the name of the root container for the shard.
|
||||||
|
:param parent_container: the name of the parent container for the
|
||||||
|
shard.
|
||||||
|
:param timestamp: an instance of :class:`~swift.common.utils.Timestamp`
|
||||||
|
:param index: a unique index that will distinguish the path from any
|
||||||
|
other path generated using the same combination of
|
||||||
|
``account``, ``root_container``, ``parent_container`` and
|
||||||
|
``timestamp``.
|
||||||
|
|
||||||
|
:return: an instance of :class:`~swift.common.utils.ShardName`.
|
||||||
|
:raises ValueError: if any argument is None
|
||||||
|
"""
|
||||||
|
# we make the shard name unique with respect to other shards names by
|
||||||
|
# embedding a hash of the parent container name; we use a hash (rather
|
||||||
|
# than the actual parent container name) to prevent shard names become
|
||||||
|
# longer with every generation.
|
||||||
|
parent_container_hash = cls.hash_container_name(parent_container)
|
||||||
|
return cls(account, root_container, parent_container_hash, timestamp,
|
||||||
|
index)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def parse(cls, name):
|
||||||
|
"""
|
||||||
|
Parse ``name`` to an instance of
|
||||||
|
:class:`~swift.common.utils.ShardName`.
|
||||||
|
|
||||||
|
:param name: a shard name which should have the form:
|
||||||
|
<account>/
|
||||||
|
<root_container>-<parent_container_hash>-<timestamp>-<index>
|
||||||
|
|
||||||
|
:return: an instance of :class:`~swift.common.utils.ShardName`.
|
||||||
|
:raises ValueError: if ``name`` is not a valid shard name.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
account, container = name.split('/', 1)
|
||||||
|
root_container, parent_container_hash, timestamp, index = \
|
||||||
|
container.rsplit('-', 3)
|
||||||
|
return cls(account, root_container, parent_container_hash,
|
||||||
|
timestamp, index)
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError('invalid name: %s' % name)
|
||||||
|
|
||||||
|
|
||||||
class ShardRange(object):
|
class ShardRange(object):
|
||||||
"""
|
"""
|
||||||
A ShardRange encapsulates sharding state related to a container including
|
A ShardRange encapsulates sharding state related to a container including
|
||||||
@@ -5276,16 +5381,38 @@ class ShardRange(object):
|
|||||||
raise TypeError('must be a string type')
|
raise TypeError('must be a string type')
|
||||||
return self._encode(bound)
|
return self._encode(bound)
|
||||||
|
|
||||||
@classmethod
|
def is_child_of(self, parent):
|
||||||
def _make_container_name(cls, root_container, parent_container, timestamp,
|
"""
|
||||||
index):
|
Test if this shard range is a child of another shard range. The
|
||||||
if not isinstance(parent_container, bytes):
|
parent-child relationship is inferred from the names of the shard
|
||||||
parent_container = parent_container.encode('utf-8')
|
ranges. This method is limited to work only within the scope of the
|
||||||
return "%s-%s-%s-%s" % (root_container,
|
same account.
|
||||||
md5(parent_container,
|
|
||||||
usedforsecurity=False).hexdigest(),
|
:param parent: an instance of ``ShardRange``.
|
||||||
cls._to_timestamp(timestamp).internal,
|
:return: True if ``parent`` is the parent of this shard range, False
|
||||||
index)
|
otherwise, assuming that they are within the same account.
|
||||||
|
"""
|
||||||
|
# note: We limit the usages of this method to be within the same
|
||||||
|
# account, because account shard prefix is configurable and it's hard
|
||||||
|
# to perform checking without breaking backward-compatibility.
|
||||||
|
try:
|
||||||
|
self_parsed_name = ShardName.parse(self.name)
|
||||||
|
except ValueError:
|
||||||
|
# self is not a shard and therefore not a child.
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
parsed_parent_name = ShardName.parse(parent.name)
|
||||||
|
parent_root_container = parsed_parent_name.root_container
|
||||||
|
except ValueError:
|
||||||
|
# parent is a root container.
|
||||||
|
parent_root_container = parent.container
|
||||||
|
|
||||||
|
return (
|
||||||
|
self_parsed_name.root_container == parent_root_container
|
||||||
|
and self_parsed_name.parent_container_hash
|
||||||
|
== ShardName.hash_container_name(parent.container)
|
||||||
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def make_path(cls, shards_account, root_container, parent_container,
|
def make_path(cls, shards_account, root_container, parent_container,
|
||||||
@@ -5308,9 +5435,12 @@ class ShardRange(object):
|
|||||||
``timestamp``.
|
``timestamp``.
|
||||||
:return: a string of the form <account_name>/<container_name>
|
:return: a string of the form <account_name>/<container_name>
|
||||||
"""
|
"""
|
||||||
shard_container = cls._make_container_name(
|
timestamp = cls._to_timestamp(timestamp)
|
||||||
root_container, parent_container, timestamp, index)
|
return str(ShardName.create(shards_account,
|
||||||
return '%s/%s' % (shards_account, shard_container)
|
root_container,
|
||||||
|
parent_container,
|
||||||
|
timestamp,
|
||||||
|
index))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _to_timestamp(cls, timestamp):
|
def _to_timestamp(cls, timestamp):
|
||||||
|
|||||||
@@ -7946,6 +7946,57 @@ class TestDistributeEvenly(unittest.TestCase):
|
|||||||
self.assertEqual(out, [[0], [1], [2], [3], [4], [], []])
|
self.assertEqual(out, [[0], [1], [2], [3], [4], [], []])
|
||||||
|
|
||||||
|
|
||||||
|
class TestShardName(unittest.TestCase):
|
||||||
|
def test(self):
|
||||||
|
ts = utils.Timestamp.now()
|
||||||
|
created = utils.ShardName.create('a', 'root', 'parent', ts, 1)
|
||||||
|
parent_hash = md5(b'parent', usedforsecurity=False).hexdigest()
|
||||||
|
expected = 'a/root-%s-%s-1' % (parent_hash, ts.internal)
|
||||||
|
actual = str(created)
|
||||||
|
self.assertEqual(expected, actual)
|
||||||
|
parsed = utils.ShardName.parse(actual)
|
||||||
|
self.assertEqual('a', parsed.account)
|
||||||
|
self.assertEqual('root', parsed.root_container)
|
||||||
|
self.assertEqual(parent_hash, parsed.parent_container_hash)
|
||||||
|
self.assertEqual(ts, parsed.timestamp)
|
||||||
|
self.assertEqual(1, parsed.index)
|
||||||
|
self.assertEqual(actual, str(parsed))
|
||||||
|
|
||||||
|
def test_root_has_hyphens(self):
|
||||||
|
parsed = utils.ShardName.parse(
|
||||||
|
'a/root-has-some-hyphens-hash-1234-99')
|
||||||
|
self.assertEqual('a', parsed.account)
|
||||||
|
self.assertEqual('root-has-some-hyphens', parsed.root_container)
|
||||||
|
self.assertEqual('hash', parsed.parent_container_hash)
|
||||||
|
self.assertEqual(utils.Timestamp(1234), parsed.timestamp)
|
||||||
|
self.assertEqual(99, parsed.index)
|
||||||
|
|
||||||
|
def test_bad_parse(self):
|
||||||
|
with self.assertRaises(ValueError) as cm:
|
||||||
|
utils.ShardName.parse('a')
|
||||||
|
self.assertEqual('invalid name: a', str(cm.exception))
|
||||||
|
with self.assertRaises(ValueError) as cm:
|
||||||
|
utils.ShardName.parse('a/c')
|
||||||
|
self.assertEqual('invalid name: a/c', str(cm.exception))
|
||||||
|
with self.assertRaises(ValueError) as cm:
|
||||||
|
utils.ShardName.parse('a/root-hash-bad')
|
||||||
|
self.assertEqual('invalid name: a/root-hash-bad', str(cm.exception))
|
||||||
|
with self.assertRaises(ValueError) as cm:
|
||||||
|
utils.ShardName.parse('a/root-hash-bad-0')
|
||||||
|
self.assertEqual('invalid name: a/root-hash-bad-0',
|
||||||
|
str(cm.exception))
|
||||||
|
with self.assertRaises(ValueError) as cm:
|
||||||
|
utils.ShardName.parse('a/root-hash-12345678.12345-bad')
|
||||||
|
self.assertEqual('invalid name: a/root-hash-12345678.12345-bad',
|
||||||
|
str(cm.exception))
|
||||||
|
|
||||||
|
def test_bad_create(self):
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
utils.ShardName.create('a', 'root', 'hash', 'bad', '0')
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
utils.ShardName.create('a', 'root', None, '1235678', 'bad')
|
||||||
|
|
||||||
|
|
||||||
class TestShardRange(unittest.TestCase):
|
class TestShardRange(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.ts_iter = make_timestamp_iter()
|
self.ts_iter = make_timestamp_iter()
|
||||||
@@ -8878,9 +8929,115 @@ class TestShardRange(unittest.TestCase):
|
|||||||
actual = utils.ShardRange.make_path(
|
actual = utils.ShardRange.make_path(
|
||||||
'a', 'root', 'parent', ts.internal, '3')
|
'a', 'root', 'parent', ts.internal, '3')
|
||||||
self.assertEqual('a/root-%s-%s-3' % (parent_hash, ts.internal), actual)
|
self.assertEqual('a/root-%s-%s-3' % (parent_hash, ts.internal), actual)
|
||||||
actual = utils.ShardRange.make_path('a', 'root', 'parent', ts, 'foo')
|
|
||||||
self.assertEqual('a/root-%s-%s-foo' % (parent_hash, ts.internal),
|
def test_is_child_of(self):
|
||||||
actual)
|
# Set up some shard ranges in relational hierarchy:
|
||||||
|
# account -> root -> grandparent -> parent -> child
|
||||||
|
# using abbreviated names a_r_gp_p_c
|
||||||
|
|
||||||
|
# account 1
|
||||||
|
ts = next(self.ts_iter)
|
||||||
|
a1_r1 = utils.ShardRange('a1/r1', ts)
|
||||||
|
ts = next(self.ts_iter)
|
||||||
|
a1_r1_gp1 = utils.ShardRange(utils.ShardRange.make_path(
|
||||||
|
'.shards_a1', 'r1', 'r1', ts, 1), ts)
|
||||||
|
ts = next(self.ts_iter)
|
||||||
|
a1_r1_gp1_p1 = utils.ShardRange(utils.ShardRange.make_path(
|
||||||
|
'.shards_a1', 'r1', a1_r1_gp1.container, ts, 1), ts)
|
||||||
|
ts = next(self.ts_iter)
|
||||||
|
a1_r1_gp1_p1_c1 = utils.ShardRange(utils.ShardRange.make_path(
|
||||||
|
'.shards_a1', 'r1', a1_r1_gp1_p1.container, ts, 1), ts)
|
||||||
|
ts = next(self.ts_iter)
|
||||||
|
a1_r1_gp1_p1_c2 = utils.ShardRange(utils.ShardRange.make_path(
|
||||||
|
'.shards_a1', 'r1', a1_r1_gp1_p1.container, ts, 2), ts)
|
||||||
|
ts = next(self.ts_iter)
|
||||||
|
a1_r1_gp1_p2 = utils.ShardRange(utils.ShardRange.make_path(
|
||||||
|
'.shards_a1', 'r1', a1_r1_gp1.container, ts, 2), ts)
|
||||||
|
ts = next(self.ts_iter)
|
||||||
|
a1_r1_gp2 = utils.ShardRange(utils.ShardRange.make_path(
|
||||||
|
'.shards_a1', 'r1', 'r1', ts, 2), ts) # different index
|
||||||
|
ts = next(self.ts_iter)
|
||||||
|
a1_r1_gp2_p1 = utils.ShardRange(utils.ShardRange.make_path(
|
||||||
|
'.shards_a1', 'r1', a1_r1_gp2.container, ts, 1), ts)
|
||||||
|
# drop the index from grandparent name
|
||||||
|
ts = next(self.ts_iter)
|
||||||
|
rogue_a1_r1_gp = utils.ShardRange(utils.ShardRange.make_path(
|
||||||
|
'.shards_a1', 'r1', 'r1', ts, 1)[:-2], ts)
|
||||||
|
|
||||||
|
# account 1, root 2
|
||||||
|
ts = next(self.ts_iter)
|
||||||
|
a1_r2 = utils.ShardRange('a1/r2', ts)
|
||||||
|
ts = next(self.ts_iter)
|
||||||
|
a1_r2_gp1 = utils.ShardRange(utils.ShardRange.make_path(
|
||||||
|
'.shards_a1', 'r2', a1_r2.container, ts, 1), ts)
|
||||||
|
ts = next(self.ts_iter)
|
||||||
|
a1_r2_gp1_p1 = utils.ShardRange(utils.ShardRange.make_path(
|
||||||
|
'.shards_a1', 'r2', a1_r2_gp1.container, ts, 3), ts)
|
||||||
|
|
||||||
|
# account 2, root1
|
||||||
|
a2_r1 = utils.ShardRange('a2/r1', ts)
|
||||||
|
ts = next(self.ts_iter)
|
||||||
|
a2_r1_gp1 = utils.ShardRange(utils.ShardRange.make_path(
|
||||||
|
'.shards_a2', 'r1', a2_r1.container, ts, 1), ts)
|
||||||
|
ts = next(self.ts_iter)
|
||||||
|
a2_r1_gp1_p1 = utils.ShardRange(utils.ShardRange.make_path(
|
||||||
|
'.shards_a2', 'r1', a2_r1_gp1.container, ts, 3), ts)
|
||||||
|
|
||||||
|
# verify parent-child within same account.
|
||||||
|
self.assertTrue(a1_r1_gp1.is_child_of(a1_r1))
|
||||||
|
self.assertTrue(a1_r1_gp1_p1.is_child_of(a1_r1_gp1))
|
||||||
|
self.assertTrue(a1_r1_gp1_p1_c1.is_child_of(a1_r1_gp1_p1))
|
||||||
|
self.assertTrue(a1_r1_gp1_p1_c2.is_child_of(a1_r1_gp1_p1))
|
||||||
|
self.assertTrue(a1_r1_gp1_p2.is_child_of(a1_r1_gp1))
|
||||||
|
|
||||||
|
self.assertTrue(a1_r1_gp2.is_child_of(a1_r1))
|
||||||
|
self.assertTrue(a1_r1_gp2_p1.is_child_of(a1_r1_gp2))
|
||||||
|
|
||||||
|
self.assertTrue(a1_r2_gp1.is_child_of(a1_r2))
|
||||||
|
self.assertTrue(a1_r2_gp1_p1.is_child_of(a1_r2_gp1))
|
||||||
|
|
||||||
|
self.assertTrue(a2_r1_gp1.is_child_of(a2_r1))
|
||||||
|
self.assertTrue(a2_r1_gp1_p1.is_child_of(a2_r1_gp1))
|
||||||
|
|
||||||
|
# verify not parent-child within same account.
|
||||||
|
self.assertFalse(a1_r1.is_child_of(a1_r1))
|
||||||
|
self.assertFalse(a1_r1.is_child_of(a1_r2))
|
||||||
|
|
||||||
|
self.assertFalse(a1_r1_gp1.is_child_of(a1_r2))
|
||||||
|
self.assertFalse(a1_r1_gp1.is_child_of(a1_r1_gp1))
|
||||||
|
self.assertFalse(a1_r1_gp1.is_child_of(a1_r1_gp1_p1))
|
||||||
|
self.assertFalse(a1_r1_gp1.is_child_of(a1_r1_gp1_p1_c1))
|
||||||
|
|
||||||
|
self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r1))
|
||||||
|
self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r2))
|
||||||
|
self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r1_gp2))
|
||||||
|
self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r2_gp1))
|
||||||
|
self.assertFalse(a1_r1_gp1_p1.is_child_of(rogue_a1_r1_gp))
|
||||||
|
self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r1_gp1_p1))
|
||||||
|
self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r1_gp1_p2))
|
||||||
|
self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r2_gp1_p1))
|
||||||
|
self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r1_gp1_p1_c1))
|
||||||
|
self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r1_gp1_p1_c2))
|
||||||
|
|
||||||
|
self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r1))
|
||||||
|
self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r1_gp1))
|
||||||
|
self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r1_gp1_p2))
|
||||||
|
self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r1_gp2_p1))
|
||||||
|
self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r1_gp1_p1_c1))
|
||||||
|
self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r1_gp1_p1_c2))
|
||||||
|
self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r2_gp1_p1))
|
||||||
|
self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a2_r1_gp1_p1))
|
||||||
|
|
||||||
|
self.assertFalse(a1_r2_gp1.is_child_of(a1_r1))
|
||||||
|
self.assertFalse(a1_r2_gp1_p1.is_child_of(a1_r1_gp1))
|
||||||
|
|
||||||
|
# across different accounts, 'is_child_of' works in some cases but not
|
||||||
|
# all, so don't use it for shard ranges in different accounts.
|
||||||
|
self.assertFalse(a1_r1.is_child_of(a2_r1))
|
||||||
|
self.assertFalse(a2_r1_gp1_p1.is_child_of(a1_r1_gp1))
|
||||||
|
self.assertFalse(a1_r1_gp1_p1.is_child_of(a2_r1))
|
||||||
|
self.assertTrue(a1_r1_gp1.is_child_of(a2_r1))
|
||||||
|
self.assertTrue(a2_r1_gp1.is_child_of(a1_r1))
|
||||||
|
|
||||||
def test_expand(self):
|
def test_expand(self):
|
||||||
bounds = (('', 'd'), ('d', 'k'), ('k', 't'), ('t', ''))
|
bounds = (('', 'd'), ('d', 'k'), ('k', 't'), ('t', ''))
|
||||||
|
|||||||
Reference in New Issue
Block a user