calling distinct on _id field against a collection is slow

To optimize the code, distinct method should be avoided especially against _id field. Sort should also be done on the server side to take advantage of generator to avoid using up memory for large data set. Change-Id: Ib417f73e2238a5d9865a2a1a9990e891b0f7a24c fixes: bug #1204607 fixes: bug #1204609
2013-07-25 10:33:55 -04:00
parent a4b1b54a48
commit 4514eb8767
1 changed files with 8 additions and 2 deletions
--- a/ceilometer/storage/impl_mongodb.py
+++ b/ceilometer/storage/impl_mongodb.py
@@ -430,7 +430,10 @@ class Connection(base.Connection):
        q = {}
        if source is not None:
            q['source'] = source
-        return sorted(self.db.user.find(q).distinct('_id'))
+
+        return (doc['_id'] for doc in
+                self.db.user.find(q, fields=['_id'],
+                                  sort=[('_id', pymongo.ASCENDING)]))

    def get_projects(self, source=None):
        """Return an iterable of project id strings.
@@ -440,7 +443,10 @@ class Connection(base.Connection):
        q = {}
        if source is not None:
            q['source'] = source
-        return sorted(self.db.project.find(q).distinct('_id'))
+
+        return (doc['_id'] for doc in
+                self.db.project.find(q, fields=['_id'],
+                                     sort=[('_id', pymongo.ASCENDING)]))

    def get_resources(self, user=None, project=None, source=None,
                      start_timestamp=None, start_timestamp_op=None,