container-server: use LIMIT in get_own_shard_range() query

The get_own_shard_range() method is called during every container GET
or HEAD request (as well as at other times). The method delegates to
get_shard_ranges() which queries the DB for shard ranges. Although the
query has conditions to only select a single shard range name, and the
method will return only the first matching shard range, the query had
no LIMIT. Adding a LIMIT of 1 significantly reduces execution time
when the DB has many shard range rows.

On the author's machine, using a DB with ~12000 shard ranges this
patch reduces the get_own_shard_range() execution time by a factor of
approximately 100.

Change-Id: I43f79de3f0603b9fab8bf6f7b4c3b1892a8919b3
This commit is contained in:
Alistair Coles 2023-07-14 14:24:33 +01:00 committed by Jianjian Huo
parent 55cd675f8b
commit 6e8d82c97e
2 changed files with 88 additions and 9 deletions

View File

@ -1688,7 +1688,8 @@ class ContainerBroker(DatabaseBroker):
def _get_shard_range_rows(self, connection=None, marker=None,
end_marker=None, includes=None,
include_deleted=False, states=None,
include_own=False, exclude_others=False):
include_own=False, exclude_others=False,
limit=None):
"""
Returns a list of shard range rows.
@ -1717,6 +1718,14 @@ class ContainerBroker(DatabaseBroker):
names do not match the broker's path are included in the returned
list. If True, those rows are not included, otherwise they are
included. Default is False.
:param limit: restricts the returned list to the given number of rows.
Should be a whole number; negative values will be ignored.
The ``limit`` parameter is useful to optimise a search
when the maximum number of expected matching rows is known, and
particularly when that maximum number is much less than the total
number of rows in the DB. However, the DB search is not ordered and
the subset of rows returned when ``limit`` is less than all
possible matching rows is therefore unpredictable.
:return: a list of tuples.
"""
@ -1761,6 +1770,8 @@ class ContainerBroker(DatabaseBroker):
params.extend((includes, includes))
if conditions:
condition = ' WHERE ' + ' AND '.join(conditions)
if limit is not None and limit >= 0:
condition += ' LIMIT %d' % limit
columns = SHARD_RANGE_KEYS[:-2]
for column in SHARD_RANGE_KEYS[-2:]:
if column in defaults:
@ -1834,8 +1845,8 @@ class ContainerBroker(DatabaseBroker):
def get_shard_ranges(self, marker=None, end_marker=None, includes=None,
reverse=False, include_deleted=False, states=None,
include_own=False,
exclude_others=False, fill_gaps=False):
include_own=False, exclude_others=False,
fill_gaps=False):
"""
Returns a list of persisted shard ranges.
@ -1923,13 +1934,13 @@ class ContainerBroker(DatabaseBroker):
default shard range is returned.
:return: an instance of :class:`~swift.common.utils.ShardRange`
"""
shard_ranges = self.get_shard_ranges(include_own=True,
include_deleted=True,
exclude_others=True)
if shard_ranges:
own_shard_range = shard_ranges[0]
rows = self._get_shard_range_rows(
include_own=True, include_deleted=True, exclude_others=True,
limit=1)
if rows:
own_shard_range = ShardRange(*rows[0])
elif no_default:
return None
own_shard_range = None
else:
own_shard_range = ShardRange(
self.path, Timestamp.now(), ShardRange.MIN, ShardRange.MAX,

View File

@ -4622,6 +4622,58 @@ class TestContainerBroker(test_db.TestDbBase):
includes='l')
self.assertEqual([shard_ranges[2]], actual)
@with_tempdir
def test_get_shard_range_rows_with_limit(self, tempdir):
db_path = os.path.join(tempdir, 'container.db')
broker = ContainerBroker(db_path, account='a', container='c')
broker.initialize(next(self.ts).internal, 0)
shard_ranges = [
ShardRange('a/c', next(self.ts), 'a', 'c'),
ShardRange('.a/c1', next(self.ts), 'c', 'd'),
ShardRange('.a/c2', next(self.ts), 'd', 'f'),
ShardRange('.a/c3', next(self.ts), 'd', 'f', deleted=1),
]
broker.merge_shard_ranges(shard_ranges)
actual = broker._get_shard_range_rows(include_deleted=True,
include_own=True)
self.assertEqual(4, len(actual))
# the order of rows is not predictable, but they should be unique
self.assertEqual(4, len(set(actual)))
actual = broker._get_shard_range_rows(include_deleted=True)
self.assertEqual(3, len(actual))
self.assertEqual(3, len(set(actual)))
# negative -> unlimited
actual = broker._get_shard_range_rows(include_deleted=True, limit=-1)
self.assertEqual(3, len(actual))
self.assertEqual(3, len(set(actual)))
# zero is applied
actual = broker._get_shard_range_rows(include_deleted=True, limit=0)
self.assertFalse(actual)
actual = broker._get_shard_range_rows(include_deleted=True, limit=1)
self.assertEqual(1, len(actual))
self.assertEqual(1, len(set(actual)))
actual = broker._get_shard_range_rows(include_deleted=True, limit=2)
self.assertEqual(2, len(actual))
self.assertEqual(2, len(set(actual)))
actual = broker._get_shard_range_rows(include_deleted=True, limit=3)
self.assertEqual(3, len(actual))
self.assertEqual(3, len(set(actual)))
actual = broker._get_shard_range_rows(include_deleted=True, limit=4)
self.assertEqual(3, len(actual))
self.assertEqual(3, len(set(actual)))
actual = broker._get_shard_range_rows(include_deleted=True,
include_own=True,
exclude_others=True,
limit=1)
self.assertEqual(1, len(actual))
self.assertEqual(shard_ranges[0], ShardRange(*actual[0]))
actual = broker._get_shard_range_rows(include_deleted=True,
include_own=True,
exclude_others=True,
limit=4)
self.assertEqual(1, len(actual))
self.assertEqual(shard_ranges[0], ShardRange(*actual[0]))
@with_tempdir
def test_get_own_shard_range(self, tempdir):
db_path = os.path.join(tempdir, 'container.db')
@ -4688,6 +4740,22 @@ class TestContainerBroker(test_db.TestDbBase):
actual = broker.get_own_shard_range()
self.assertEqual(dict(own_sr), dict(actual))
orig_execute = GreenDBConnection.execute
mock_call_args = []
def mock_execute(*args, **kwargs):
mock_call_args.append(args)
return orig_execute(*args, **kwargs)
with mock.patch('swift.common.db.GreenDBConnection.execute',
mock_execute):
actual = broker.get_own_shard_range()
self.assertEqual(dict(own_sr), dict(actual))
self.assertEqual(1, len(mock_call_args))
# verify that SQL is optimised with LIMIT
self.assertIn("WHERE name = ? LIMIT 1", mock_call_args[0][1])
self.assertEqual(['.shards_a/shard_c'], mock_call_args[0][2])
@with_tempdir
def test_enable_sharding(self, tempdir):
db_path = os.path.join(tempdir, 'container.db')