container-server: plumb includes down into _get_shard_range_rows

The container server performs a shard range lookup for every object
update PUT that has an 'x-backend-accept-redirect' header. The lookup
is performed using the ContainerBroker.get_shard_ranges() method with
an 'includes' argument equal to the object's name. The implementation
of this method previously performed an SQL query to select *all* shard
ranges that are in one of the updating states. The results were then
filtered in python to find the shard range that includes the object
name.

Benchmarking found that, for container DBs with O(1000) shard ranges,
object updates with an 'x-backend-accept-redirect' header could be
between 20 to 50 slower than those without, depending on the age and
history of the DB.

This patch adds the 'includes object name' constraint to the SQL
select query, which now returns only shard ranges that include the
object name. The change has been observed to reduce the shard range
lookup overhead due to an 'x-backend-accept-redirect' header by a
factor of approximately 10.

Change-Id: Ie40b62432dd9ccfd314861655f7972a2123938fe
This commit is contained in:
Tim Burke
2018-05-21 11:47:57 -07:00
parent a669a75987
commit 185b11e2fd
2 changed files with 101 additions and 10 deletions

View File

@@ -1665,9 +1665,9 @@ class ContainerBroker(DatabaseBroker):
if ('no such table: %s' % SHARD_RANGE_TABLE) not in str(err):
raise
def _get_shard_range_rows(self, connection=None, include_deleted=False,
states=None, include_own=False,
exclude_others=False):
def _get_shard_range_rows(self, connection=None, includes=None,
include_deleted=False, states=None,
include_own=False, exclude_others=False):
"""
Returns a list of shard range rows.
@@ -1676,6 +1676,8 @@ class ContainerBroker(DatabaseBroker):
``exclude_others=True``.
:param connection: db connection
:param includes: restricts the returned list to the shard range that
includes the given value
:param include_deleted: include rows marked as deleted
:param states: include only rows matching the given state(s); can be an
int or a list of ints.
@@ -1719,6 +1721,9 @@ class ContainerBroker(DatabaseBroker):
if exclude_others:
conditions.append('name = ?')
params.append(self.path)
if includes is not None:
conditions.extend(('lower < ?', "(upper = '' OR upper >= ?)"))
params.extend((includes, includes))
if conditions:
condition = ' WHERE ' + ' AND '.join(conditions)
columns = SHARD_RANGE_KEYS[:-2]
@@ -1833,16 +1838,18 @@ class ContainerBroker(DatabaseBroker):
shard_ranges = [
ShardRange(*row)
for row in self._get_shard_range_rows(
include_deleted=include_deleted, states=states,
include_own=include_own,
includes=includes, include_deleted=include_deleted,
states=states, include_own=include_own,
exclude_others=exclude_others)]
shard_ranges.sort(key=ShardRange.sort_key)
if includes:
return shard_ranges[:1] if shard_ranges else []
shard_ranges = filter_shard_ranges(shard_ranges, includes,
marker, end_marker)
if not includes and fill_gaps:
if fill_gaps:
own_shard_range = self._own_shard_range()
if shard_ranges:
last_upper = shard_ranges[-1].upper

View File

@@ -4056,9 +4056,22 @@ class TestContainerBroker(unittest.TestCase):
actual = broker.get_shard_ranges(marker='e', end_marker='e')
self.assertFalse([dict(sr) for sr in actual])
orig_execute = GreenDBConnection.execute
mock_call_args = []
def mock_execute(*args, **kwargs):
mock_call_args.append(args)
return orig_execute(*args, **kwargs)
with mock.patch('swift.common.db.GreenDBConnection.execute',
mock_execute):
actual = broker.get_shard_ranges(includes='f')
self.assertEqual([dict(sr) for sr in shard_ranges[2:3]],
[dict(sr) for sr in actual])
self.assertEqual(1, len(mock_call_args))
# verify that includes keyword plumbs through to an SQL condition
self.assertIn("WHERE deleted=0 AND name != ? AND lower < ? AND "
"(upper = '' OR upper >= ?)", mock_call_args[0][1])
actual = broker.get_shard_ranges(includes='i')
self.assertFalse(actual)
@@ -4115,6 +4128,61 @@ class TestContainerBroker(unittest.TestCase):
include_own=False, exclude_others=True)
self.assertFalse(actual)
@with_tempdir
def test_get_shard_ranges_includes(self, tempdir):
ts = next(self.ts)
start = ShardRange('a/-a', ts, '', 'a')
atof = ShardRange('a/a-f', ts, 'a', 'f')
ftol = ShardRange('a/f-l', ts, 'f', 'l')
ltor = ShardRange('a/l-r', ts, 'l', 'r')
rtoz = ShardRange('a/r-z', ts, 'r', 'z')
end = ShardRange('a/z-', ts, 'z', '')
ranges = [start, atof, ftol, ltor, rtoz, end]
db_path = os.path.join(tempdir, 'container.db')
broker = ContainerBroker(db_path, account='a', container='c')
broker.initialize(next(self.ts).internal, 0)
broker.merge_shard_ranges(ranges)
actual = broker.get_shard_ranges(includes='')
self.assertEqual(actual, [])
actual = broker.get_shard_ranges(includes=' ')
self.assertEqual(actual, [start])
actual = broker.get_shard_ranges(includes='b')
self.assertEqual(actual, [atof])
actual = broker.get_shard_ranges(includes='f')
self.assertEqual(actual, [atof])
actual = broker.get_shard_ranges(includes='f\x00')
self.assertEqual(actual, [ftol])
actual = broker.get_shard_ranges(includes='x')
self.assertEqual(actual, [rtoz])
actual = broker.get_shard_ranges(includes='r')
self.assertEqual(actual, [ltor])
actual = broker.get_shard_ranges(includes='}')
self.assertEqual(actual, [end])
# add some overlapping sub-shards
ftoh = ShardRange('a/f-h', ts, 'f', 'h')
htok = ShardRange('a/h-k', ts, 'h', 'k')
broker.merge_shard_ranges([ftoh, htok])
actual = broker.get_shard_ranges(includes='g')
self.assertEqual(actual, [ftoh])
actual = broker.get_shard_ranges(includes='h')
self.assertEqual(actual, [ftoh])
actual = broker.get_shard_ranges(includes='k')
self.assertEqual(actual, [htok])
actual = broker.get_shard_ranges(includes='l')
self.assertEqual(actual, [ftol])
actual = broker.get_shard_ranges(includes='m')
self.assertEqual(actual, [ltor])
# remove l-r from shard ranges and try and find a shard range for an
# item in that range.
ltor.set_deleted(next(self.ts))
broker.merge_shard_ranges([ltor])
actual = broker.get_shard_ranges(includes='p')
self.assertEqual(actual, [])
@with_tempdir
def test_overlap_shard_range_order(self, tempdir):
db_path = os.path.join(tempdir, 'container.db')
@@ -4184,9 +4252,25 @@ class TestContainerBroker(unittest.TestCase):
[dict(sr) for sr in shard_ranges[:3] + shard_ranges[4:]],
[dict(sr) for sr in actual])
orig_execute = GreenDBConnection.execute
mock_call_args = []
def mock_execute(*args, **kwargs):
mock_call_args.append(args)
return orig_execute(*args, **kwargs)
with mock.patch('swift.common.db.GreenDBConnection.execute',
mock_execute):
actual = broker.get_shard_ranges(states=SHARD_UPDATE_STATES,
includes='e')
self.assertEqual([shard_ranges[1]], actual)
self.assertEqual([dict(shard_ranges[1])],
[dict(sr) for sr in actual])
self.assertEqual(1, len(mock_call_args))
# verify that includes keyword plumbs through to an SQL condition
self.assertIn("WHERE deleted=0 AND state in (?,?,?,?) AND name != ? "
"AND lower < ? AND (upper = '' OR upper >= ?)",
mock_call_args[0][1])
actual = broker.get_shard_ranges(states=SHARD_UPDATE_STATES,
includes='j')
self.assertEqual([shard_ranges[2]], actual)