Prefer links dicts for pagination

The current pagination logic makes call to look for additional items
at times even if it's not needed, as it is only able to infer when to
make an additional call or not if the user is requesting a limit value
in their call.

However, calls to list() without limit don't do the right thing. If the
user does list() with no limit then list will always have to make an
unneeded additional call because it has no limit to compare against.

This additional call became evident in the next patch when attempting to
use sdk primitives in the shade layer and for applications like nodepool
would result in unacceptably higher traffic.

Luckily, there are mechanisms available that can be checked to see if
pagination is needed. Nova, Cinder and other Nova-like projects provide
a top level links dict named {resources_key}_links. The api-wg
recommendation is for a top level dict to be provided named 'links'.
Glance provides a top-level key named 'next'. Swift returns the total
number of objects in a container in a header.

The first three can be looked for with no additional options but simply
by inspecting the payload. The Swift case requires that we add an optional
"pagination_key" option to Resource.

Finally, we can keep the existing logic that applies when a limit was
given and the number of resources returned exactly matches the limit as
a heuristic for making an additional call for resources that do not
otherwise return next links. Since most resources that support
pagination should reutnr a next link, the fallback logic should only
rarely be triggered. If anyone encounters a situation with an
unacceptable additional call cost that stems from a service that does
not return next links, we can use that as motivation to get the service
in question fixed.

Change-Id: I7a015f05fb43b63805a5aa9f7e266069d5e52438
This commit is contained in:
Monty Taylor
2017-12-31 11:58:00 -06:00
parent 46135f9b85
commit e068d891ed
4 changed files with 354 additions and 54 deletions

View File

@@ -216,6 +216,8 @@ class Resource(object):
resource_key = None
#: Plural form of key for resource.
resources_key = None
#: Key used for pagination links
pagination_key = None
#: The ID of this resource.
id = Body("id")
@@ -726,45 +728,111 @@ class Resource(object):
if not cls.allow_list:
raise exceptions.MethodNotSupported(cls, "list")
more_data = True
query_params = cls._query_mapping._transpose(params)
uri = cls.base_path % params
while more_data:
resp = session.get(uri,
headers={"Accept": "application/json"},
params=query_params)
resp = resp.json()
limit = query_params.get('limit')
# Track the total number of resources yielded so we can paginate
# swift objects
total_yielded = 0
while uri:
# Copy query_params due to weird mock unittest interactions
response = session.get(
uri,
headers={"Accept": "application/json"},
params=query_params.copy())
exceptions.raise_from_response(response)
data = response.json()
# Discard any existing pagination keys
query_params.pop('marker', None)
query_params.pop('limit', None)
if cls.resources_key:
resp = resp[cls.resources_key]
resources = data[cls.resources_key]
else:
resources = data
if not resp:
more_data = False
if not isinstance(resources, list):
resources = [resources]
# Keep track of how many items we've yielded. If we yielded
# less than our limit, we don't need to do an extra request
# to get back an empty data set, which acts as a sentinel.
# Keep track of how many items we've yielded. The server should
# handle this, but it's easy for us to as well.
yielded = 0
new_marker = None
for data in resp:
marker = None
for raw_resource in resources:
# Do not allow keys called "self" through. Glance chose
# to name a key "self", so we need to pop it out because
# we can't send it through cls.existing and into the
# Resource initializer. "self" is already the first
# argument and is practically a reserved word.
data.pop("self", None)
raw_resource.pop("self", None)
value = cls.existing(**data)
new_marker = value.id
yielded += 1
value = cls.existing(**raw_resource)
marker = value.id
yield value
yielded += 1
total_yielded += 1
if not paginated:
# If a limit was given by the user and we have not returned
# as many records as the limit, then it stands to reason that
# there are no more records to return and we don't need to do
# anything else.
if limit and yielded < limit:
return
if "limit" in query_params and yielded < query_params["limit"]:
if resources and paginated:
uri, next_params = cls._get_next_link(
uri, response, data, marker, limit, total_yielded)
query_params.update(next_params)
else:
return
query_params["limit"] = yielded
query_params["marker"] = new_marker
@classmethod
def _get_next_link(cls, uri, response, data, marker, limit, total_yielded):
next_link = None
params = {}
if isinstance(data, dict):
pagination_key = cls.pagination_key
if not pagination_key and 'links' in data:
# api-wg guidelines are for a links dict in the main body
pagination_key == 'links'
if not pagination_key and cls.resources_key:
# Nova has a {key}_links dict in the main body
pagination_key = '{key}_links'.format(key=cls.resources_key)
if pagination_key:
links = data.get(pagination_key, {})
for item in links:
if item.get('rel') == 'next' and 'href' in item:
next_link = item['href']
break
# Glance has a next field in the main body
next_link = next_link or data.get('next')
if not next_link and 'next' in response.links:
# RFC5988 specifies Link headers and requests parses them if they
# are there. We prefer link dicts in resource body, but if those
# aren't there and Link headers are, use them.
next_link = response.links['next']['uri']
# Swift provides a count of resources in a header and a list body
if not next_link and cls.pagination_key:
total_count = response.headers.get(cls.pagination_key)
if total_count:
total_count = int(total_count)
if total_count > total_yielded:
params['marker'] = marker
if limit:
params['limit'] = limit
next_link = uri
# If we still have no link, and limit was given and is non-zero,
# and the number of records yielded equals the limit, then the user
# is playing pagination ball so we should go ahead and try once more.
if not next_link and limit:
next_link = uri
params['marker'] = marker
params['limit'] = limit
return next_link, params
@classmethod
def _get_one_match(cls, name_or_id, results):

View File

@@ -65,6 +65,7 @@ class TestSample(testtools.TestCase):
sess = mock.Mock()
resp = mock.Mock()
resp.json = mock.Mock(return_value=[SAMPLE])
resp.status_code = 200
sess.get = mock.Mock(return_value=resp)
found = sample.Sample.list(sess, counter_name='name_of_meter')

View File

@@ -73,6 +73,7 @@ class TestFloatingIP(testtools.TestCase):
fake_response = mock.Mock()
body = {floating_ip.FloatingIP.resources_key: [data]}
fake_response.json = mock.Mock(return_value=body)
fake_response.status_code = 200
mock_session.get = mock.Mock(return_value=fake_response)
result = floating_ip.FloatingIP.find_available(mock_session)
@@ -88,6 +89,7 @@ class TestFloatingIP(testtools.TestCase):
fake_response = mock.Mock()
body = {floating_ip.FloatingIP.resources_key: []}
fake_response.json = mock.Mock(return_value=body)
fake_response.status_code = 200
mock_session.get = mock.Mock(return_value=fake_response)
self.assertIsNone(floating_ip.FloatingIP.find_available(mock_session))

View File

@@ -938,6 +938,7 @@ class TestResourceActions(base.TestCase):
class Test(resource2.Resource):
service = self.service_name
base_path = self.base_path
resources_key = 'resources'
allow_create = True
allow_get = True
allow_head = True
@@ -1096,7 +1097,9 @@ class TestResourceActions(base.TestCase):
# the generator. Wrap calls to self.sot.list in a `list`
# and then test the results as a list of responses.
def test_list_empty_response(self):
mock_response = FakeResponse([])
mock_response = mock.Mock()
mock_response.status_code = 200
mock_response.json.return_value = {"resources": []}
self.session.get.return_value = mock_response
@@ -1112,8 +1115,9 @@ class TestResourceActions(base.TestCase):
def test_list_one_page_response_paginated(self):
id_value = 1
mock_response = mock.Mock()
mock_response.json.side_effect = [[{"id": id_value}],
[]]
mock_response.status_code = 200
mock_response.links = {}
mock_response.json.return_value = {"resources": [{"id": id_value}]}
self.session.get.return_value = mock_response
@@ -1123,17 +1127,15 @@ class TestResourceActions(base.TestCase):
self.assertEqual(1, len(results))
# Look at the `params` argument to each of the get calls that
# were made.
self.session.get.call_args_list[0][1]["params"] = {}
self.session.get.call_args_list[1][1]["params"] = {"marker": id_value}
self.assertEqual(1, len(self.session.get.call_args_list))
self.assertEqual(id_value, results[0].id)
self.assertIsInstance(results[0], self.test_class)
def test_list_one_page_response_not_paginated(self):
id_value = 1
mock_response = mock.Mock()
mock_response.json.return_value = [{"id": id_value}]
mock_response.status_code = 200
mock_response.json.return_value = {"resources": [{"id": id_value}]}
self.session.get.return_value = mock_response
@@ -1156,6 +1158,7 @@ class TestResourceActions(base.TestCase):
id_value = 1
mock_response = mock.Mock()
mock_response.status_code = 200
mock_response.json.return_value = {key: [{"id": id_value}]}
self.session.get.return_value = mock_response
@@ -1173,11 +1176,85 @@ class TestResourceActions(base.TestCase):
self.assertEqual(id_value, results[0].id)
self.assertIsInstance(results[0], self.test_class)
def test_list_response_paginated_without_links(self):
ids = [1, 2]
mock_response = mock.Mock()
mock_response.status_code = 200
mock_response.links = {}
mock_response.json.return_value = {
"resources": [{"id": ids[0]}],
"resources_links": [{
"href": "https://example.com/next-url",
"rel": "next",
}]
}
mock_response2 = mock.Mock()
mock_response2.status_code = 200
mock_response2.links = {}
mock_response2.json.return_value = {
"resources": [{"id": ids[1]}],
}
self.session.get.side_effect = [mock_response, mock_response2]
results = list(self.sot.list(self.session, paginated=True))
self.assertEqual(2, len(results))
self.assertEqual(ids[0], results[0].id)
self.assertEqual(ids[1], results[1].id)
self.assertEqual(
mock.call('base_path',
headers={'Accept': 'application/json'}, params={}),
self.session.get.mock_calls[0])
self.assertEqual(
mock.call('https://example.com/next-url',
headers={'Accept': 'application/json'}, params={}),
self.session.get.mock_calls[1])
self.assertEqual(2, len(self.session.get.call_args_list))
self.assertIsInstance(results[0], self.test_class)
def test_list_response_paginated_with_links(self):
ids = [1, 2]
mock_response = mock.Mock()
mock_response.status_code = 200
mock_response.links = {}
mock_response.json.side_effect = [
{
"resources": [{"id": ids[0]}],
"resources_links": [{
"href": "https://example.com/next-url",
"rel": "next",
}]
}, {
"resources": [{"id": ids[1]}],
}]
self.session.get.return_value = mock_response
results = list(self.sot.list(self.session, paginated=True))
self.assertEqual(2, len(results))
self.assertEqual(ids[0], results[0].id)
self.assertEqual(ids[1], results[1].id)
self.assertEqual(
mock.call('base_path',
headers={'Accept': 'application/json'}, params={}),
self.session.get.mock_calls[0])
self.assertEqual(
mock.call('https://example.com/next-url',
headers={'Accept': 'application/json'}, params={}),
self.session.get.mock_calls[2])
self.assertEqual(2, len(self.session.get.call_args_list))
self.assertIsInstance(results[0], self.test_class)
def test_list_multi_page_response_not_paginated(self):
ids = [1, 2]
mock_response = mock.Mock()
mock_response.json.side_effect = [[{"id": ids[0]}],
[{"id": ids[1]}]]
mock_response.status_code = 200
mock_response.json.side_effect = [
{"resources": [{"id": ids[0]}]},
{"resources": [{"id": ids[1]}]},
]
self.session.get.return_value = mock_response
@@ -1194,10 +1271,16 @@ class TestResourceActions(base.TestCase):
uri_param = "uri param!"
mock_response = mock.Mock()
mock_response.json.side_effect = [[{"id": id}],
[]]
mock_response.status_code = 200
mock_response.links = {}
mock_response.json.return_value = {"resources": [{"id": id}]}
self.session.get.return_value = mock_response
mock_empty = mock.Mock()
mock_empty.status_code = 200
mock_empty.links = {}
mock_empty.json.return_value = {"resources": []}
self.session.get.side_effect = [mock_response, mock_empty]
class Test(self.test_class):
_query_mapping = resource2.QueryParameters(query_param=qp_name)
@@ -1217,19 +1300,33 @@ class TestResourceActions(base.TestCase):
Test.base_path % {"something": uri_param})
def test_list_multi_page_response_paginated(self):
# This tests our ability to stop making calls once
# we've received all of the data. However, this tests
# the case that we always receive full pages of data
# and then the signal that there is no more data - an empty list.
# In this case, we need to make one extra request beyond
# the end of data to ensure we've received it all.
ids = [1, 2]
resp1 = mock.Mock()
resp1.json.return_value = [{"id": ids[0]}]
resp1.status_code = 200
resp1.links = {}
resp1.json.return_value = {
"resources": [{"id": ids[0]}],
"resources_links": [{
"href": "https://example.com/next-url",
"rel": "next",
}],
}
resp2 = mock.Mock()
resp2.json.return_value = [{"id": ids[1]}]
resp2.status_code = 200
resp2.links = {}
resp2.json.return_value = {
"resources": [{"id": ids[1]}],
"resources_links": [{
"href": "https://example.com/next-url",
"rel": "next",
}],
}
resp3 = mock.Mock()
resp3.json.return_value = []
resp3.status_code = 200
resp3.links = {}
resp3.json.return_value = {
"resources": []
}
self.session.get.side_effect = [resp1, resp2, resp3]
@@ -1245,26 +1342,113 @@ class TestResourceActions(base.TestCase):
result1 = next(results)
self.assertEqual(result1.id, ids[1])
self.session.get.assert_called_with(
self.base_path,
'https://example.com/next-url',
headers={"Accept": "application/json"},
params={"limit": 1, "marker": 1})
params={})
self.assertRaises(StopIteration, next, results)
self.session.get.assert_called_with(
self.base_path,
'https://example.com/next-url',
headers={"Accept": "application/json"},
params={"limit": 1, "marker": 2})
params={})
def test_list_multi_page_early_termination(self):
# This tests our ability to be somewhat smart when evaluating
# the contents of the responses. When we receive a full page
# of data, we can be smart about terminating our responses
# once we see that we've received a page with less data than
# expected, saving one request.
# the contents of the responses. When we request a limit and
# receive less than that limit and there are no next links,
# we can be pretty sure there are no more pages.
ids = [1, 2]
resp1 = mock.Mock()
resp1.status_code = 200
resp1.json.return_value = {
"resources": [{"id": ids[0]}, {"id": ids[1]}],
}
self.session.get.return_value = resp1
results = self.sot.list(self.session, limit=3, paginated=True)
result0 = next(results)
self.assertEqual(result0.id, ids[0])
result1 = next(results)
self.assertEqual(result1.id, ids[1])
self.session.get.assert_called_with(
self.base_path,
headers={"Accept": "application/json"},
params={"limit": 3})
# Ensure we're done after those two items
self.assertRaises(StopIteration, next, results)
# Ensure we only made one calls to get this done
self.assertEqual(1, len(self.session.get.call_args_list))
def test_list_multi_page_inferred_additional(self):
# If we explicitly request a limit and we receive EXACTLY that
# amount of results and there is no next link, we make one additional
# call to check to see if there are more records and the service is
# just sad.
# NOTE(mordred) In a perfect world we would not do this. But it's 2018
# and I don't think anyone has any illusions that we live in a perfect
# world anymore.
ids = [1, 2, 3]
resp1 = mock.Mock()
resp1.status_code = 200
resp1.links = {}
resp1.json.return_value = {
"resources": [{"id": ids[0]}, {"id": ids[1]}],
}
resp2 = mock.Mock()
resp2.status_code = 200
resp2.links = {}
resp2.json.return_value = {"resources": [{"id": ids[2]}]}
self.session.get.side_effect = [resp1, resp2]
results = self.sot.list(self.session, limit=2, paginated=True)
# Get the first page's two items
result0 = next(results)
self.assertEqual(result0.id, ids[0])
result1 = next(results)
self.assertEqual(result1.id, ids[1])
self.session.get.assert_called_with(
self.base_path,
headers={"Accept": "application/json"},
params={"limit": 2})
result2 = next(results)
self.assertEqual(result2.id, ids[2])
self.session.get.assert_called_with(
self.base_path,
headers={"Accept": "application/json"},
params={'limit': 2, 'marker': 2})
# Ensure we're done after those three items
self.assertRaises(StopIteration, next, results)
# Ensure we only made two calls to get this done
self.assertEqual(2, len(self.session.get.call_args_list))
def test_list_multi_page_header_count(self):
class Test(self.test_class):
resources_key = None
pagination_key = 'X-Container-Object-Count'
self.sot = Test()
# Swift returns a total number of objects in a header and we compare
# that against the total number returned to know if we need to fetch
# more objects.
ids = [1, 2, 3]
resp1 = mock.Mock()
resp1.status_code = 200
resp1.links = {}
resp1.headers = {'X-Container-Object-Count': 3}
resp1.json.return_value = [{"id": ids[0]}, {"id": ids[1]}]
resp2 = mock.Mock()
resp2.status_code = 200
resp2.links = {}
resp2.headers = {'X-Container-Object-Count': 3}
resp2.json.return_value = [{"id": ids[2]}]
self.session.get.side_effect = [resp1, resp2]
@@ -1281,13 +1465,58 @@ class TestResourceActions(base.TestCase):
headers={"Accept": "application/json"},
params={})
# Second page only has one item
result2 = next(results)
self.assertEqual(result2.id, ids[2])
self.session.get.assert_called_with(
self.base_path,
headers={"Accept": "application/json"},
params={"limit": 2, "marker": 2})
params={'marker': 2})
# Ensure we're done after those three items
self.assertRaises(StopIteration, next, results)
# Ensure we only made two calls to get this done
self.assertEqual(2, len(self.session.get.call_args_list))
def test_list_multi_page_link_header(self):
# Swift returns a total number of objects in a header and we compare
# that against the total number returned to know if we need to fetch
# more objects.
ids = [1, 2, 3]
resp1 = mock.Mock()
resp1.status_code = 200
resp1.links = {
'next': {'uri': 'https://example.com/next-url', 'rel': 'next'}}
resp1.headers = {}
resp1.json.return_value = {
"resources": [{"id": ids[0]}, {"id": ids[1]}],
}
resp2 = mock.Mock()
resp2.status_code = 200
resp2.links = {}
resp2.headers = {}
resp2.json.return_value = {"resources": [{"id": ids[2]}]}
self.session.get.side_effect = [resp1, resp2]
results = self.sot.list(self.session, paginated=True)
# Get the first page's two items
result0 = next(results)
self.assertEqual(result0.id, ids[0])
result1 = next(results)
self.assertEqual(result1.id, ids[1])
self.session.get.assert_called_with(
self.base_path,
headers={"Accept": "application/json"},
params={})
result2 = next(results)
self.assertEqual(result2.id, ids[2])
self.session.get.assert_called_with(
'https://example.com/next-url',
headers={"Accept": "application/json"},
params={})
# Ensure we're done after those three items
self.assertRaises(StopIteration, next, results)