From 0f9eff30000a90bc3260768a76be64ae9c009a34 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Fri, 4 Mar 2016 16:10:01 -0500 Subject: [PATCH 1/3] Add release notes and documentation for python-323 --- CHANGELOG.rst | 18 ++++++++++++++++++ cassandra/cqlengine/query.py | 4 +++- docs/api/cassandra/cqlengine/query.rst | 6 ++++++ docs/cqlengine/queryset.rst | 9 ++++++++- 4 files changed, 35 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e5f6059f..26dd8d96 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -45,6 +45,24 @@ Bug Fixes * cqlengine: class.MultipleObjectsReturned has DoesNotExist as base class (PYTHON-489) * cqlengine: Typo in cqlengine UserType __len__ breaks attribute assignment (PYTHON-502) + +Other +----- + +* cqlengine: a major improvement on queryset has been introduced. It + is a lot more efficient to iterage large datasets: the rows are + now fetched on demand using the driver pagination. + +* cqlengine: the queryset len() and count() behaviors have changed. It + now executes a "SELECT COUNT(*)" of the query rather than returning + the size of the internal result_cache (loaded rows). On large + queryset, you might want to avoid using them due to the performance + cost. Note that trying to access objects using list index/slicing + with negative indices also requires a count to be + executed. + + + 3.0.0 ===== November 24, 2015 diff --git a/cassandra/cqlengine/query.py b/cassandra/cqlengine/query.py index 085971f7..6c2a2a25 100644 --- a/cassandra/cqlengine/query.py +++ b/cassandra/cqlengine/query.py @@ -700,7 +700,9 @@ class AbstractQuerySet(object): def count(self): """ - Returns the number of rows matched by this query + Returns the number of rows matched by this query. + + *Note: This function executes a SELECT COUNT() and has a performance cost on large datasets* """ if self._batch: raise CQLEngineException("Only inserts, updates, and deletes are available in batch mode") diff --git a/docs/api/cassandra/cqlengine/query.rst b/docs/api/cassandra/cqlengine/query.rst index bd45b3b9..62119e39 100644 --- a/docs/api/cassandra/cqlengine/query.rst +++ b/docs/api/cassandra/cqlengine/query.rst @@ -18,6 +18,12 @@ The methods here are used to filter, order, and constrain results. .. automethod:: count + .. method:: len(queryset) + + Returns the number of rows matched by this query. This function uses :meth:`~.cassandra.cqlengine.query.ModelQuerySet.count` internally. + + *Note: This function executes a SELECT COUNT() and has a performance cost on large datasets* + .. automethod:: distinct .. automethod:: filter diff --git a/docs/cqlengine/queryset.rst b/docs/cqlengine/queryset.rst index 0732a244..18287f92 100644 --- a/docs/cqlengine/queryset.rst +++ b/docs/cqlengine/queryset.rst @@ -79,6 +79,10 @@ Accessing objects in a QuerySet q[0] #returns the first result q[1] #returns the second result + .. note:: + + * CQL does not support specifying a start position in it's queries. Therefore, accessing elements using array indexing will load every result up to the index value requested + * Using negative indices requires a "SELECT COUNT()" to be executed. This has a performance cost on large datasets. * list slicing .. code-block:: python @@ -87,7 +91,10 @@ Accessing objects in a QuerySet q[1:] #returns all results except the first q[1:9] #returns a slice of the results - *Note: CQL does not support specifying a start position in it's queries. Therefore, accessing elements using array indexing / slicing will load every result up to the index value requested* + .. note:: + + * CQL does not support specifying a start position in it's queries. Therefore, accessing elements using array slicing will load every result up to the index value requested + * Using negative indices requires a "SELECT COUNT()" to be executed. This has a performance cost on large datasets. * calling :attr:`get() ` on the queryset .. code-block:: python From 11eadc945d8d98a595fa0879f6f01cfb9b333cb7 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Fri, 4 Mar 2016 16:41:48 -0500 Subject: [PATCH 2/3] Add some details in the docs for limit(None) and fetch_size() --- cassandra/cqlengine/query.py | 9 +++++++-- docs/api/cassandra/cqlengine/query.rst | 2 ++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/cassandra/cqlengine/query.py b/cassandra/cqlengine/query.py index 6c2a2a25..4b2230c9 100644 --- a/cassandra/cqlengine/query.py +++ b/cassandra/cqlengine/query.py @@ -753,14 +753,19 @@ class AbstractQuerySet(object): def limit(self, v): """ - Limits the number of results returned by Cassandra. + Limits the number of results returned by Cassandra. Use *0* or *None* to disable. *Note that CQL's default limit is 10,000, so all queries without a limit set explicitly will have an implicit limit of 10,000* .. code-block:: python + # Fetch 100 users for user in User.objects().limit(100): print(user) + + # Fetch all users + for user in User.objects().limit(None): + print(user) """ if v is None: @@ -782,7 +787,7 @@ class AbstractQuerySet(object): """ Sets the number of rows that are fetched at a time. - *Note that driver's default fetch size is 5000. + *Note that driver's default fetch size is 5000.* .. code-block:: python diff --git a/docs/api/cassandra/cqlengine/query.rst b/docs/api/cassandra/cqlengine/query.rst index 62119e39..ad5489f2 100644 --- a/docs/api/cassandra/cqlengine/query.rst +++ b/docs/api/cassandra/cqlengine/query.rst @@ -32,6 +32,8 @@ The methods here are used to filter, order, and constrain results. .. automethod:: limit + .. automethod:: fetch_size + .. automethod:: if_not_exists .. automethod:: if_exists From eef82584c1a6929b7aed07ef6ce5025e3ded2d52 Mon Sep 17 00:00:00 2001 From: Alan Boudreault Date: Fri, 4 Mar 2016 17:01:16 -0500 Subject: [PATCH 3/3] typo --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 26dd8d96..012355ef 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -50,7 +50,7 @@ Other ----- * cqlengine: a major improvement on queryset has been introduced. It - is a lot more efficient to iterage large datasets: the rows are + is a lot more efficient to iterate large datasets: the rows are now fetched on demand using the driver pagination. * cqlengine: the queryset len() and count() behaviors have changed. It