Add generic fuzzy matching logic to the database layer

share snap list, Fuzzy query by name or description is supported, but
the current implementation is first get all the shares, then In the API
layer, the for loop is used to achieve fuzzy matching, if the num of
shares is big,
It will seriously affect the speed of fuzzy matching.Therefore, we
should let the database do the matching to speed up the query.
Moving the pagination params (limit, offset, sorting) to the database
layer for snapshot list, to optimize query speed.

Closes-Bug:#1881865
Partial-Bug:#1831094

Change-Id: I283e78c9e7c2dd626d94cf6c1b01d4e2f9ae8097
This commit is contained in:
haixin 2020-06-08 17:08:51 +08:00
parent 4bfea794d7
commit e032c13bfb
10 changed files with 194 additions and 105 deletions

View File

@ -91,12 +91,18 @@ class ShareSnapshotMixin(object):
search_opts = {}
search_opts.update(req.GET)
params = common.get_pagination_params(req)
limit, offset = [params.get('limit'), params.get('offset')]
# Remove keys that are not related to share attrs
search_opts.pop('limit', None)
search_opts.pop('offset', None)
sort_key = search_opts.pop('sort_key', 'created_at')
sort_dir = search_opts.pop('sort_dir', 'desc')
sort_key, sort_dir = common.get_sort_params(search_opts)
key_dict = {"name": "display_name",
"description": "display_description"}
for key in key_dict:
if sort_key == key:
sort_key = key_dict[key]
# NOTE(vponomaryov): Manila stores in DB key 'display_name', but
# allows to use both keys 'name' and 'display_name'. It is leftover
@ -119,19 +125,16 @@ class ShareSnapshotMixin(object):
snapshots = self.share_api.get_all_snapshots(
context,
search_opts=search_opts,
limit=limit,
offset=offset,
sort_key=sort_key,
sort_dir=sort_dir,
)
# Snapshots with no instances are filtered out.
snapshots = list(filter(lambda x: x.get('status') is not None,
snapshots))
limited_list = common.limited(snapshots, req)
if is_detail:
snapshots = self._view_builder.detail_list(req, limited_list)
snapshots = self._view_builder.detail_list(req, snapshots)
else:
snapshots = self._view_builder.summary_list(req, limited_list)
snapshots = self._view_builder.summary_list(req, snapshots)
return snapshots
def _get_snapshots_search_options(self):

View File

@ -604,21 +604,21 @@ def share_snapshot_get(context, snapshot_id):
return IMPL.share_snapshot_get(context, snapshot_id)
def share_snapshot_get_all(context, filters=None, sort_key=None,
sort_dir=None):
def share_snapshot_get_all(context, filters=None, limit=None, offset=None,
sort_key=None, sort_dir=None):
"""Get all snapshots."""
return IMPL.share_snapshot_get_all(
context, filters=filters, sort_key=sort_key, sort_dir=sort_dir,
)
context, filters=filters, limit=limit, offset=offset,
sort_key=sort_key, sort_dir=sort_dir)
def share_snapshot_get_all_by_project(context, project_id, filters=None,
sort_key=None, sort_dir=None):
limit=None, offset=None, sort_key=None,
sort_dir=None):
"""Get all snapshots belonging to a project."""
return IMPL.share_snapshot_get_all_by_project(
context, project_id, filters=filters, sort_key=sort_key,
sort_dir=sort_dir,
)
context, project_id, filters=filters, limit=limit, offset=offset,
sort_key=sort_key, sort_dir=sort_dir)
def share_snapshot_get_all_for_share(context, share_id, filters=None,

View File

@ -274,6 +274,50 @@ def model_query(context, model, *args, **kwargs):
model=model, session=session, args=args, **kwargs)
def _process_model_like_filter(model, query, filters):
"""Applies regex expression filtering to a query.
:param model: model to apply filters to
:param query: query to apply filters to
:param filters: dictionary of filters with regex values
:returns: the updated query.
"""
if query is None:
return query
if filters:
for key in sorted(filters):
column_attr = getattr(model, key)
if 'property' == type(column_attr).__name__:
continue
value = filters[key]
if not (isinstance(value, (str, int))):
continue
query = query.filter(
column_attr.op('LIKE')(u'%%%s%%' % value))
return query
def apply_like_filters(process_exact_filters):
def _decorator(query, model, filters, legal_keys):
exact_filters = filters.copy()
regex_filters = {}
for key, value in filters.items():
if key not in legal_keys:
# Skip ones we're not filtering on
continue
# NOTE(haixin): For inexact match, the filter keys
# are in the format of 'key~=value'
if key.endswith('~'):
exact_filters.pop(key)
regex_filters[key.rstrip('~')] = value
query = process_exact_filters(query, model, exact_filters,
legal_keys)
return _process_model_like_filter(model, query, regex_filters)
return _decorator
@apply_like_filters
def exact_filter(query, model, filters, legal_keys,
created_at_key='created_at'):
"""Applies exact match filtering to a query.
@ -2865,11 +2909,25 @@ def share_snapshot_get(context, snapshot_id, session=None):
def _share_snapshot_get_all_with_filters(context, project_id=None,
share_id=None, filters=None,
limit=None, offset=None,
sort_key=None, sort_dir=None):
"""Retrieves all snapshots.
If no sorting parameters are specified then returned snapshots are sorted
by the 'created_at' key and desc order.
:param context: context to query under
:param filters: dictionary of filters
:param limit: maximum number of items to return
:param sort_key: attribute by which results should be sorted,default is
created_at
:param sort_dir: direction in which results should be sorted
:returns: list of matching snapshots
"""
# Init data
sort_key = sort_key or 'share_id'
sort_key = sort_key or 'created_at'
sort_dir = sort_dir or 'desc'
filters = filters or {}
filters = copy.deepcopy(filters) if filters else {}
query = model_query(context, models.ShareSnapshot)
if project_id:
@ -2879,60 +2937,65 @@ def _share_snapshot_get_all_with_filters(context, project_id=None,
query = query.options(joinedload('share'))
query = query.options(joinedload('instances'))
# Snapshots with no instances are filtered out.
query = query.filter(
models.ShareSnapshot.id == models.ShareSnapshotInstance.snapshot_id)
# Apply filters
if 'usage' in filters:
usage_filter_keys = ['any', 'used', 'unused']
if filters['usage'] == 'any':
pass
elif filters['usage'] == 'used':
query = query.filter(or_(models.Share.snapshot_id == (
models.ShareSnapshot.id)))
query = query.filter(models.Share.snapshot_id == (
models.ShareSnapshot.id))
elif filters['usage'] == 'unused':
query = query.filter(or_(models.Share.snapshot_id != (
models.ShareSnapshot.id)))
query = query.filter(models.Share.snapshot_id != (
models.ShareSnapshot.id))
else:
msg = _("Wrong 'usage' key provided - '%(key)s'. "
"Expected keys are '%(ek)s'.") % {
'key': filters['usage'],
'ek': usage_filter_keys}
raise exception.InvalidInput(reason=msg)
filters.pop('usage')
if 'status' in filters:
query = query.filter(models.ShareSnapshotInstance.status == (
filters['status']))
filters.pop('status')
# Apply sorting
try:
attr = getattr(models.ShareSnapshot, sort_key)
except AttributeError:
msg = _("Wrong sorting key provided - '%s'.") % sort_key
raise exception.InvalidInput(reason=msg)
if sort_dir.lower() == 'desc':
query = query.order_by(attr.desc())
elif sort_dir.lower() == 'asc':
query = query.order_by(attr.asc())
else:
msg = _("Wrong sorting data provided: sort key is '%(sort_key)s' "
"and sort direction is '%(sort_dir)s'.") % {
"sort_key": sort_key, "sort_dir": sort_dir}
raise exception.InvalidInput(reason=msg)
legal_filter_keys = ('display_name', 'display_name~',
'display_description', 'display_description~',
'id', 'user_id', 'project_id', 'share_id',
'share_proto', 'size', 'share_size')
query = exact_filter(query, models.ShareSnapshot,
filters, legal_filter_keys)
query = utils.paginate_query(query, models.ShareSnapshot, limit,
sort_key=sort_key,
sort_dir=sort_dir,
offset=offset)
# Returns list of shares that satisfy filters
return query.all()
@require_admin_context
def share_snapshot_get_all(context, filters=None, sort_key=None,
sort_dir=None):
def share_snapshot_get_all(context, filters=None, limit=None, offset=None,
sort_key=None, sort_dir=None):
return _share_snapshot_get_all_with_filters(
context, filters=filters, sort_key=sort_key, sort_dir=sort_dir,
)
context, filters=filters, limit=limit,
offset=offset, sort_key=sort_key, sort_dir=sort_dir)
@require_context
def share_snapshot_get_all_by_project(context, project_id, filters=None,
limit=None, offset=None,
sort_key=None, sort_dir=None):
authorize_project_context(context, project_id)
return _share_snapshot_get_all_with_filters(
context, project_id=project_id,
filters=filters, sort_key=sort_key, sort_dir=sort_dir,
)
context, project_id=project_id, filters=filters, limit=limit,
offset=offset, sort_key=sort_key, sort_dir=sort_dir)
@require_context

View File

@ -1907,8 +1907,8 @@ class API(base.Base):
policy.check_policy(context, 'share_snapshot', 'get_snapshot')
return self.db.share_snapshot_get(context, snapshot_id)
def get_all_snapshots(self, context, search_opts=None,
sort_key='share_id', sort_dir='desc'):
def get_all_snapshots(self, context, search_opts=None, limit=None,
offset=None, sort_key='share_id', sort_dir='desc'):
policy.check_policy(context, 'share_snapshot', 'get_all_snapshots')
search_opts = search_opts or {}
@ -1925,29 +1925,16 @@ class API(base.Base):
"'%(v)s'.") % {'k': k, 'v': string_args[k]}
raise exception.InvalidInput(reason=msg)
if (context.is_admin and all_tenants):
if context.is_admin and all_tenants:
snapshots = self.db.share_snapshot_get_all(
context, filters=search_opts,
context, filters=search_opts, limit=limit, offset=offset,
sort_key=sort_key, sort_dir=sort_dir)
else:
snapshots = self.db.share_snapshot_get_all_by_project(
context, context.project_id, filters=search_opts,
sort_key=sort_key, sort_dir=sort_dir)
limit=limit, offset=offset, sort_key=sort_key,
sort_dir=sort_dir)
# Remove key 'usage' if provided
search_opts.pop('usage', None)
if search_opts:
results = []
not_found = object()
for snapshot in snapshots:
if (all(snapshot.get(k, not_found) == v or
(v in snapshot.get(k.rstrip('~'))
if k.endswith('~') and
snapshot.get(k.rstrip('~')) else ())
for k, v in search_opts.items())):
results.append(snapshot)
snapshots = results
return snapshots
def get_latest_snapshot_for_share(self, context, share_id):

View File

@ -191,6 +191,7 @@ def stub_snapshot_delete(self, context, *args, **param):
def stub_snapshot_get_all_by_project(self, context, search_opts=None,
limit=None, offset=None,
sort_key=None, sort_dir=None):
return [stub_snapshot_get(self, context, 2)]

View File

@ -184,7 +184,7 @@ class ShareSnapshotAPITest(test.TestCase):
url = url + '&' + k + '=' + v
req = fakes.HTTPRequest.blank(url, use_admin_context=use_admin_context)
snapshots = [
db_snapshots = [
{'id': 'id1', 'display_name': 'n1',
'status': 'fake_status', 'share_id': 'fake_share_id'},
{'id': 'id2', 'display_name': 'n2',
@ -192,6 +192,7 @@ class ShareSnapshotAPITest(test.TestCase):
{'id': 'id3', 'display_name': 'n3',
'status': 'fake_status', 'share_id': 'fake_share_id'},
]
snapshots = [db_snapshots[1]]
self.mock_object(share_api.API, 'get_all_snapshots',
mock.Mock(return_value=snapshots))
@ -206,14 +207,16 @@ class ShareSnapshotAPITest(test.TestCase):
search_opts_expected.update({'fake_key': 'fake_value'})
share_api.API.get_all_snapshots.assert_called_once_with(
req.environ['manila.context'],
limit=int(search_opts['limit']),
offset=int(search_opts['offset']),
sort_key=search_opts['sort_key'],
sort_dir=search_opts['sort_dir'],
search_opts=search_opts_expected,
)
self.assertEqual(1, len(result['snapshots']))
self.assertEqual(snapshots[1]['id'], result['snapshots'][0]['id'])
self.assertEqual(snapshots[0]['id'], result['snapshots'][0]['id'])
self.assertEqual(
snapshots[1]['display_name'], result['snapshots'][0]['name'])
snapshots[0]['display_name'], result['snapshots'][0]['name'])
def test_snapshot_list_summary_with_search_opts_by_non_admin(self):
self._snapshot_list_summary_with_search_opts(use_admin_context=False)
@ -229,7 +232,7 @@ class ShareSnapshotAPITest(test.TestCase):
url = url + '&' + k + '=' + v
req = fakes.HTTPRequest.blank(url, use_admin_context=use_admin_context)
snapshots = [
db_snapshots = [
{
'id': 'id1',
'display_name': 'n1',
@ -252,6 +255,7 @@ class ShareSnapshotAPITest(test.TestCase):
'share_id': 'fake_share_id',
},
]
snapshots = [db_snapshots[1]]
self.mock_object(share_api.API, 'get_all_snapshots',
mock.Mock(return_value=snapshots))
@ -267,18 +271,20 @@ class ShareSnapshotAPITest(test.TestCase):
search_opts_expected.update({'fake_key': 'fake_value'})
share_api.API.get_all_snapshots.assert_called_once_with(
req.environ['manila.context'],
limit=int(search_opts['limit']),
offset=int(search_opts['offset']),
sort_key=search_opts['sort_key'],
sort_dir=search_opts['sort_dir'],
search_opts=search_opts_expected,
)
self.assertEqual(1, len(result['snapshots']))
self.assertEqual(snapshots[1]['id'], result['snapshots'][0]['id'])
self.assertEqual(snapshots[0]['id'], result['snapshots'][0]['id'])
self.assertEqual(
snapshots[1]['display_name'], result['snapshots'][0]['name'])
snapshots[0]['display_name'], result['snapshots'][0]['name'])
self.assertEqual(
snapshots[1]['status'], result['snapshots'][0]['status'])
snapshots[0]['status'], result['snapshots'][0]['status'])
self.assertEqual(
snapshots[1]['share_id'], result['snapshots'][0]['share_id'])
snapshots[0]['share_id'], result['snapshots'][0]['share_id'])
def test_snapshot_list_detail_with_search_opts_by_non_admin(self):
self._snapshot_list_detail_with_search_opts(use_admin_context=False)
@ -296,14 +302,6 @@ class ShareSnapshotAPITest(test.TestCase):
def test_snapshot_list_status_none(self):
snapshots = [
{
'id': 2,
'share_id': 'fakeshareid',
'size': 1,
'status': 'fakesnapstatus',
'name': 'displaysnapname',
'description': 'displaysnapdesc',
},
{
'id': 3,
'share_id': 'fakeshareid',

View File

@ -202,11 +202,12 @@ class ShareSnapshotAPITest(test.TestCase):
req = fakes.HTTPRequest.blank(
url, use_admin_context=use_admin_context, version=version)
snapshots = [
db_snapshots = [
{'id': 'id1', 'display_name': 'n1', 'status': 'fake_status', },
{'id': 'id2', 'display_name': 'n2', 'status': 'fake_status', },
{'id': 'id3', 'display_name': 'n3', 'status': 'fake_status', },
]
snapshots = [db_snapshots[1]]
self.mock_object(share_api.API, 'get_all_snapshots',
mock.Mock(return_value=snapshots))
@ -225,14 +226,16 @@ class ShareSnapshotAPITest(test.TestCase):
search_opts_expected.update({'fake_key': 'fake_value'})
share_api.API.get_all_snapshots.assert_called_once_with(
req.environ['manila.context'],
limit=int(search_opts['limit']),
offset=int(search_opts['offset']),
sort_key=search_opts['sort_key'],
sort_dir=search_opts['sort_dir'],
search_opts=search_opts_expected,
)
self.assertEqual(1, len(result['snapshots']))
self.assertEqual(snapshots[1]['id'], result['snapshots'][0]['id'])
self.assertEqual(snapshots[0]['id'], result['snapshots'][0]['id'])
self.assertEqual(
snapshots[1]['display_name'], result['snapshots'][0]['name'])
snapshots[0]['display_name'], result['snapshots'][0]['name'])
@ddt.data({'version': '2.35', 'use_admin_context': True},
{'version': '2.36', 'use_admin_context': True},
@ -252,7 +255,7 @@ class ShareSnapshotAPITest(test.TestCase):
url = url + '&' + k + '=' + v
req = fakes.HTTPRequest.blank(url, use_admin_context=use_admin_context)
snapshots = [
db_snapshots = [
{
'id': 'id1',
'display_name': 'n1',
@ -273,6 +276,7 @@ class ShareSnapshotAPITest(test.TestCase):
'aggregate_status': 'fake_status',
},
]
snapshots = [db_snapshots[1]]
self.mock_object(share_api.API, 'get_all_snapshots',
mock.Mock(return_value=snapshots))
@ -288,18 +292,20 @@ class ShareSnapshotAPITest(test.TestCase):
search_opts_expected.update({'fake_key': 'fake_value'})
share_api.API.get_all_snapshots.assert_called_once_with(
req.environ['manila.context'],
limit=int(search_opts['limit']),
offset=int(search_opts['offset']),
sort_key=search_opts['sort_key'],
sort_dir=search_opts['sort_dir'],
search_opts=search_opts_expected,
)
self.assertEqual(1, len(result['snapshots']))
self.assertEqual(snapshots[1]['id'], result['snapshots'][0]['id'])
self.assertEqual(snapshots[0]['id'], result['snapshots'][0]['id'])
self.assertEqual(
snapshots[1]['display_name'], result['snapshots'][0]['name'])
snapshots[0]['display_name'], result['snapshots'][0]['name'])
self.assertEqual(
snapshots[1]['aggregate_status'], result['snapshots'][0]['status'])
snapshots[0]['aggregate_status'], result['snapshots'][0]['status'])
self.assertEqual(
snapshots[1]['share_id'], result['snapshots'][0]['share_id'])
snapshots[0]['share_id'], result['snapshots'][0]['share_id'])
def test_snapshot_list_detail_with_search_opts_by_non_admin(self):
self._snapshot_list_detail_with_search_opts(use_admin_context=False)

View File

@ -1539,6 +1539,20 @@ class ShareSnapshotDatabaseAPITestCase(test.TestCase):
self.assertEqual(1, len(actual_result.instances))
self.assertSubDictMatch(values, actual_result.to_dict())
def test_share_snapshot_get_all_with_filters_some(self):
expected_status = constants.STATUS_AVAILABLE
filters = {
'status': expected_status
}
snapshots = db_api.share_snapshot_get_all(
self.ctxt, filters=filters)
for snapshot in snapshots:
self.assertEqual('fake_snapshot_id_2', snapshot['id'])
self.assertEqual(snapshot['status'], filters['status'])
self.assertEqual(1, len(snapshots))
def test_share_snapshot_get_latest_for_share(self):
share = db_utils.create_share(size=1)

View File

@ -2516,7 +2516,8 @@ class ShareAPITestCase(test.TestCase):
share_api.policy.check_policy.assert_called_once_with(
ctx, 'share_snapshot', 'get_all_snapshots')
db_api.share_snapshot_get_all_by_project.assert_called_once_with(
ctx, 'fakepid', sort_dir='desc', sort_key='share_id', filters={})
ctx, 'fakepid', limit=None, offset=None, sort_dir='desc',
sort_key='share_id', filters={})
@mock.patch.object(db_api, 'share_snapshot_get_all', mock.Mock())
def test_get_all_snapshots_admin_all_tenants(self):
@ -2525,7 +2526,8 @@ class ShareAPITestCase(test.TestCase):
share_api.policy.check_policy.assert_called_once_with(
self.context, 'share_snapshot', 'get_all_snapshots')
db_api.share_snapshot_get_all.assert_called_once_with(
self.context, sort_dir='desc', sort_key='share_id', filters={})
self.context, limit=None, offset=None, sort_dir='desc',
sort_key='share_id', filters={})
@mock.patch.object(db_api, 'share_snapshot_get_all_by_project',
mock.Mock())
@ -2535,7 +2537,8 @@ class ShareAPITestCase(test.TestCase):
share_api.policy.check_policy.assert_called_once_with(
ctx, 'share_snapshot', 'get_all_snapshots')
db_api.share_snapshot_get_all_by_project.assert_called_once_with(
ctx, 'fakepid', sort_dir='desc', sort_key='share_id', filters={})
ctx, 'fakepid', limit=None, offset=None, sort_dir='desc',
sort_key='share_id', filters={})
def test_get_all_snapshots_not_admin_search_opts(self):
search_opts = {'size': 'fakesize'}
@ -2546,28 +2549,34 @@ class ShareAPITestCase(test.TestCase):
result = self.api.get_all_snapshots(ctx, search_opts)
self.assertEqual([search_opts], result)
self.assertEqual(fake_objs, result)
share_api.policy.check_policy.assert_called_once_with(
ctx, 'share_snapshot', 'get_all_snapshots')
db_api.share_snapshot_get_all_by_project.assert_called_once_with(
ctx, 'fakepid', sort_dir='desc', sort_key='share_id',
filters=search_opts)
ctx, 'fakepid', limit=None, offset=None, sort_dir='desc',
sort_key='share_id', filters=search_opts)
@ddt.data(({'name': 'fo'}, 0), ({'description': 'd'}, 0),
({'name': 'foo', 'description': 'd'}, 0),
({'name': 'foo'}, 1), ({'description': 'ds'}, 1),
({'name~': 'foo', 'description~': 'ds'}, 2),
({'name': 'foo', 'description~': 'ds'}, 1),
({'name~': 'foo', 'description': 'ds'}, 1))
@ddt.data(({'name': 'fo'}, 0, []), ({'description': 'd'}, 0, []),
({'name': 'foo', 'description': 'd'}, 0, []),
({'name': 'foo'}, 1, [{'name': 'foo', 'description': 'ds'}]),
({'description': 'ds'}, 1, [{'name': 'foo',
'description': 'ds'}]),
({'name~': 'foo', 'description~': 'ds'}, 2,
[{'name': 'foo', 'description': 'ds'},
{'name': 'foo1', 'description': 'ds1'}]),
({'name': 'foo', 'description~': 'ds'}, 1,
[{'name': 'foo', 'description': 'ds'}]),
({'name~': 'foo', 'description': 'ds'}, 1,
[{'name': 'foo', 'description': 'ds'}]))
@ddt.unpack
def test_get_all_snapshots_filter_by_name_and_description(
self, search_opts, get_snapshot_number):
self, search_opts, get_snapshot_number, res_snapshots):
fake_objs = [{'name': 'fo2', 'description': 'd2'},
{'name': 'foo', 'description': 'ds'},
{'name': 'foo1', 'description': 'ds1'}]
ctx = context.RequestContext('fakeuid', 'fakepid', is_admin=False)
self.mock_object(db_api, 'share_snapshot_get_all_by_project',
mock.Mock(return_value=fake_objs))
mock.Mock(return_value=res_snapshots))
result = self.api.get_all_snapshots(ctx, search_opts)
@ -2580,8 +2589,8 @@ class ShareAPITestCase(test.TestCase):
share_api.policy.check_policy.assert_called_once_with(
ctx, 'share_snapshot', 'get_all_snapshots')
db_api.share_snapshot_get_all_by_project.assert_called_once_with(
ctx, 'fakepid', sort_dir='desc', sort_key='share_id',
filters=search_opts)
ctx, 'fakepid', limit=None, offset=None, sort_dir='desc',
sort_key='share_id', filters=search_opts)
def test_get_all_snapshots_with_sorting_valid(self):
self.mock_object(
@ -2593,7 +2602,8 @@ class ShareAPITestCase(test.TestCase):
share_api.policy.check_policy.assert_called_once_with(
ctx, 'share_snapshot', 'get_all_snapshots')
db_api.share_snapshot_get_all_by_project.assert_called_once_with(
ctx, 'fake_pid_1', sort_dir='asc', sort_key='status', filters={})
ctx, 'fake_pid_1', limit=None, offset=None, sort_dir='asc',
sort_key='status', filters={})
self.assertEqual(_FAKE_LIST_OF_ALL_SNAPSHOTS[0], snapshots)
def test_get_all_snapshots_sort_key_invalid(self):

View File

@ -0,0 +1,7 @@
---
fixes:
- |
Fixed `bug #1881865 <https://bugs.launchpad.net/manila/+bug/1881865>`_
Added generic fuzzy matching logic to the database layer, This logic is
applied to query share snapshot list, This will greatly improve the speed
of paging fuzzy queries.