Allow batch_size=None in Table.scan() to avoid filter incompatibilities
Allow None as a valid value for the batch_size argument to Table.scan(), since HBase does not support specifying a batch size when some scanner filters are used. Fixes issue #54.
This commit is contained in:
5
NEWS.rst
5
NEWS.rst
@@ -14,6 +14,11 @@ Release date: *not yet released*
|
||||
to :py:meth:`Table.scan` (`issue #39
|
||||
<https://github.com/wbolster/happybase/issues/39>`_).
|
||||
|
||||
* Allow `None` as a valid value for the `batch_size` argument to
|
||||
:py:meth:`Table.scan`, since HBase does not support specifying a batch size
|
||||
when some scanner filters are used. (`issue #54
|
||||
<https://github.com/wbolster/happybase/issues/54>`_).
|
||||
|
||||
|
||||
HappyBase 0.7
|
||||
-------------
|
||||
|
||||
@@ -254,6 +254,15 @@ class Table(object):
|
||||
this to a low value (or even 1) if your data is large, since a low
|
||||
batch size results in added round-trips to the server.
|
||||
|
||||
.. warning::
|
||||
|
||||
Not all HBase filters can be used in combination with a batch
|
||||
size. Explicitly specify `None` for the `batch_size` argument
|
||||
in those cases to override the default value. Failure to do
|
||||
so can result in hard to debug errors (not HappyBase's
|
||||
fault), such as a non-responsive connection. The HBase logs
|
||||
may contain more useful information in these situations.
|
||||
|
||||
**Compatibility notes:**
|
||||
|
||||
* The `filter` argument is only available when using HBase 0.92
|
||||
@@ -280,11 +289,11 @@ class Table(object):
|
||||
:return: generator yielding the rows matching the scan
|
||||
:rtype: iterable of `(row_key, row_data)` tuples
|
||||
"""
|
||||
if batch_size < 1:
|
||||
raise ValueError("'batch_size' must be >= 1")
|
||||
if batch_size is not None and batch_size < 1:
|
||||
raise ValueError("'batch_size' must be >= 1 (or None)")
|
||||
|
||||
if limit is not None and limit < 1:
|
||||
raise ValueError("'limit' must be >= 1")
|
||||
raise ValueError("'limit' must be >= 1 (or None)")
|
||||
|
||||
if sorted_columns and self.connection.compat < '0.96':
|
||||
raise NotImplementedError(
|
||||
@@ -349,16 +358,16 @@ class Table(object):
|
||||
n_returned = n_fetched = 0
|
||||
try:
|
||||
while True:
|
||||
if limit is None:
|
||||
if batch_size is None:
|
||||
how_many = 1
|
||||
else:
|
||||
how_many = batch_size
|
||||
else:
|
||||
how_many = min(batch_size, limit - n_returned)
|
||||
|
||||
if how_many == 1:
|
||||
items = self.connection.client.scannerGet(scan_id)
|
||||
else:
|
||||
items = self.connection.client.scannerGetList(
|
||||
scan_id, how_many)
|
||||
if limit is not None:
|
||||
how_many = min(how_many, limit - n_returned)
|
||||
|
||||
items = self.connection.client.scannerGetList(
|
||||
scan_id, how_many)
|
||||
|
||||
n_fetched += len(items)
|
||||
|
||||
|
||||
@@ -357,7 +357,7 @@ def test_scan():
|
||||
list(table.scan(row_prefix='foobar', row_start='xyz'))
|
||||
|
||||
with assert_raises(ValueError):
|
||||
list(table.scan(batch_size=None))
|
||||
list(table.scan(batch_size=0))
|
||||
|
||||
if connection.compat == '0.90':
|
||||
with assert_raises(NotImplementedError):
|
||||
@@ -446,6 +446,13 @@ def test_scan_sorting():
|
||||
row.items())
|
||||
|
||||
|
||||
def test_scan_filter_and_batch_size():
|
||||
# See issue #54
|
||||
filter = "SingleColumnValueFilter ('cf1', 'qual1', =, 'binary:val1')"
|
||||
for k, v in table.scan(filter=filter, batch_size=None):
|
||||
print v
|
||||
|
||||
|
||||
def test_delete():
|
||||
row_key = 'row-test-delete'
|
||||
data = {'cf1:col1': 'v1',
|
||||
|
||||
Reference in New Issue
Block a user