calling distinct on _id field against a collection is slow

To optimize the code, distinct method should be avoided especially
against _id field. Sort should also be done on the server side to
take advantage of generator to avoid using up memory for large
data set.

Change-Id: Ib417f73e2238a5d9865a2a1a9990e891b0f7a24c
fixes: bug #1204607
fixes: bug #1204609
This commit is contained in:
Tong Li
2013-07-25 10:33:55 -04:00
parent a4b1b54a48
commit 4514eb8767

View File

@@ -430,7 +430,10 @@ class Connection(base.Connection):
q = {}
if source is not None:
q['source'] = source
return sorted(self.db.user.find(q).distinct('_id'))
return (doc['_id'] for doc in
self.db.user.find(q, fields=['_id'],
sort=[('_id', pymongo.ASCENDING)]))
def get_projects(self, source=None):
"""Return an iterable of project id strings.
@@ -440,7 +443,10 @@ class Connection(base.Connection):
q = {}
if source is not None:
q['source'] = source
return sorted(self.db.project.find(q).distinct('_id'))
return (doc['_id'] for doc in
self.db.project.find(q, fields=['_id'],
sort=[('_id', pymongo.ASCENDING)]))
def get_resources(self, user=None, project=None, source=None,
start_timestamp=None, start_timestamp_op=None,