From e068d891ed1fbaf2b7ed01fc6a4eba29385f3ba7 Mon Sep 17 00:00:00 2001 From: Monty Taylor Date: Sun, 31 Dec 2017 11:58:00 -0600 Subject: [PATCH] Prefer links dicts for pagination The current pagination logic makes call to look for additional items at times even if it's not needed, as it is only able to infer when to make an additional call or not if the user is requesting a limit value in their call. However, calls to list() without limit don't do the right thing. If the user does list() with no limit then list will always have to make an unneeded additional call because it has no limit to compare against. This additional call became evident in the next patch when attempting to use sdk primitives in the shade layer and for applications like nodepool would result in unacceptably higher traffic. Luckily, there are mechanisms available that can be checked to see if pagination is needed. Nova, Cinder and other Nova-like projects provide a top level links dict named {resources_key}_links. The api-wg recommendation is for a top level dict to be provided named 'links'. Glance provides a top-level key named 'next'. Swift returns the total number of objects in a container in a header. The first three can be looked for with no additional options but simply by inspecting the payload. The Swift case requires that we add an optional "pagination_key" option to Resource. Finally, we can keep the existing logic that applies when a limit was given and the number of resources returned exactly matches the limit as a heuristic for making an additional call for resources that do not otherwise return next links. Since most resources that support pagination should reutnr a next link, the fallback logic should only rarely be triggered. If anyone encounters a situation with an unacceptable additional call cost that stems from a service that does not return next links, we can use that as motivation to get the service in question fixed. Change-Id: I7a015f05fb43b63805a5aa9f7e266069d5e52438 --- openstack/resource2.py | 112 +++++-- openstack/tests/unit/meter/v2/test_sample.py | 1 + .../tests/unit/network/v2/test_floating_ip.py | 2 + openstack/tests/unit/test_resource2.py | 293 ++++++++++++++++-- 4 files changed, 354 insertions(+), 54 deletions(-) diff --git a/openstack/resource2.py b/openstack/resource2.py index 258e196cf..716b31604 100644 --- a/openstack/resource2.py +++ b/openstack/resource2.py @@ -216,6 +216,8 @@ class Resource(object): resource_key = None #: Plural form of key for resource. resources_key = None + #: Key used for pagination links + pagination_key = None #: The ID of this resource. id = Body("id") @@ -726,45 +728,111 @@ class Resource(object): if not cls.allow_list: raise exceptions.MethodNotSupported(cls, "list") - more_data = True query_params = cls._query_mapping._transpose(params) uri = cls.base_path % params - while more_data: - resp = session.get(uri, - headers={"Accept": "application/json"}, - params=query_params) - resp = resp.json() + limit = query_params.get('limit') + + # Track the total number of resources yielded so we can paginate + # swift objects + total_yielded = 0 + while uri: + # Copy query_params due to weird mock unittest interactions + response = session.get( + uri, + headers={"Accept": "application/json"}, + params=query_params.copy()) + exceptions.raise_from_response(response) + data = response.json() + + # Discard any existing pagination keys + query_params.pop('marker', None) + query_params.pop('limit', None) + if cls.resources_key: - resp = resp[cls.resources_key] + resources = data[cls.resources_key] + else: + resources = data - if not resp: - more_data = False + if not isinstance(resources, list): + resources = [resources] - # Keep track of how many items we've yielded. If we yielded - # less than our limit, we don't need to do an extra request - # to get back an empty data set, which acts as a sentinel. + # Keep track of how many items we've yielded. The server should + # handle this, but it's easy for us to as well. yielded = 0 - new_marker = None - for data in resp: + marker = None + for raw_resource in resources: # Do not allow keys called "self" through. Glance chose # to name a key "self", so we need to pop it out because # we can't send it through cls.existing and into the # Resource initializer. "self" is already the first # argument and is practically a reserved word. - data.pop("self", None) + raw_resource.pop("self", None) - value = cls.existing(**data) - new_marker = value.id - yielded += 1 + value = cls.existing(**raw_resource) + marker = value.id yield value + yielded += 1 + total_yielded += 1 - if not paginated: + # If a limit was given by the user and we have not returned + # as many records as the limit, then it stands to reason that + # there are no more records to return and we don't need to do + # anything else. + if limit and yielded < limit: return - if "limit" in query_params and yielded < query_params["limit"]: + + if resources and paginated: + uri, next_params = cls._get_next_link( + uri, response, data, marker, limit, total_yielded) + query_params.update(next_params) + else: return - query_params["limit"] = yielded - query_params["marker"] = new_marker + + @classmethod + def _get_next_link(cls, uri, response, data, marker, limit, total_yielded): + next_link = None + params = {} + if isinstance(data, dict): + pagination_key = cls.pagination_key + + if not pagination_key and 'links' in data: + # api-wg guidelines are for a links dict in the main body + pagination_key == 'links' + if not pagination_key and cls.resources_key: + # Nova has a {key}_links dict in the main body + pagination_key = '{key}_links'.format(key=cls.resources_key) + if pagination_key: + links = data.get(pagination_key, {}) + for item in links: + if item.get('rel') == 'next' and 'href' in item: + next_link = item['href'] + break + # Glance has a next field in the main body + next_link = next_link or data.get('next') + if not next_link and 'next' in response.links: + # RFC5988 specifies Link headers and requests parses them if they + # are there. We prefer link dicts in resource body, but if those + # aren't there and Link headers are, use them. + next_link = response.links['next']['uri'] + # Swift provides a count of resources in a header and a list body + if not next_link and cls.pagination_key: + total_count = response.headers.get(cls.pagination_key) + if total_count: + total_count = int(total_count) + if total_count > total_yielded: + params['marker'] = marker + if limit: + params['limit'] = limit + next_link = uri + # If we still have no link, and limit was given and is non-zero, + # and the number of records yielded equals the limit, then the user + # is playing pagination ball so we should go ahead and try once more. + if not next_link and limit: + next_link = uri + params['marker'] = marker + params['limit'] = limit + return next_link, params @classmethod def _get_one_match(cls, name_or_id, results): diff --git a/openstack/tests/unit/meter/v2/test_sample.py b/openstack/tests/unit/meter/v2/test_sample.py index ee2588512..8f7faf729 100644 --- a/openstack/tests/unit/meter/v2/test_sample.py +++ b/openstack/tests/unit/meter/v2/test_sample.py @@ -65,6 +65,7 @@ class TestSample(testtools.TestCase): sess = mock.Mock() resp = mock.Mock() resp.json = mock.Mock(return_value=[SAMPLE]) + resp.status_code = 200 sess.get = mock.Mock(return_value=resp) found = sample.Sample.list(sess, counter_name='name_of_meter') diff --git a/openstack/tests/unit/network/v2/test_floating_ip.py b/openstack/tests/unit/network/v2/test_floating_ip.py index 0005f13d7..0ae513989 100644 --- a/openstack/tests/unit/network/v2/test_floating_ip.py +++ b/openstack/tests/unit/network/v2/test_floating_ip.py @@ -73,6 +73,7 @@ class TestFloatingIP(testtools.TestCase): fake_response = mock.Mock() body = {floating_ip.FloatingIP.resources_key: [data]} fake_response.json = mock.Mock(return_value=body) + fake_response.status_code = 200 mock_session.get = mock.Mock(return_value=fake_response) result = floating_ip.FloatingIP.find_available(mock_session) @@ -88,6 +89,7 @@ class TestFloatingIP(testtools.TestCase): fake_response = mock.Mock() body = {floating_ip.FloatingIP.resources_key: []} fake_response.json = mock.Mock(return_value=body) + fake_response.status_code = 200 mock_session.get = mock.Mock(return_value=fake_response) self.assertIsNone(floating_ip.FloatingIP.find_available(mock_session)) diff --git a/openstack/tests/unit/test_resource2.py b/openstack/tests/unit/test_resource2.py index fdfc449e9..a69b88dc0 100644 --- a/openstack/tests/unit/test_resource2.py +++ b/openstack/tests/unit/test_resource2.py @@ -938,6 +938,7 @@ class TestResourceActions(base.TestCase): class Test(resource2.Resource): service = self.service_name base_path = self.base_path + resources_key = 'resources' allow_create = True allow_get = True allow_head = True @@ -1096,7 +1097,9 @@ class TestResourceActions(base.TestCase): # the generator. Wrap calls to self.sot.list in a `list` # and then test the results as a list of responses. def test_list_empty_response(self): - mock_response = FakeResponse([]) + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"resources": []} self.session.get.return_value = mock_response @@ -1112,8 +1115,9 @@ class TestResourceActions(base.TestCase): def test_list_one_page_response_paginated(self): id_value = 1 mock_response = mock.Mock() - mock_response.json.side_effect = [[{"id": id_value}], - []] + mock_response.status_code = 200 + mock_response.links = {} + mock_response.json.return_value = {"resources": [{"id": id_value}]} self.session.get.return_value = mock_response @@ -1123,17 +1127,15 @@ class TestResourceActions(base.TestCase): self.assertEqual(1, len(results)) - # Look at the `params` argument to each of the get calls that - # were made. - self.session.get.call_args_list[0][1]["params"] = {} - self.session.get.call_args_list[1][1]["params"] = {"marker": id_value} + self.assertEqual(1, len(self.session.get.call_args_list)) self.assertEqual(id_value, results[0].id) self.assertIsInstance(results[0], self.test_class) def test_list_one_page_response_not_paginated(self): id_value = 1 mock_response = mock.Mock() - mock_response.json.return_value = [{"id": id_value}] + mock_response.status_code = 200 + mock_response.json.return_value = {"resources": [{"id": id_value}]} self.session.get.return_value = mock_response @@ -1156,6 +1158,7 @@ class TestResourceActions(base.TestCase): id_value = 1 mock_response = mock.Mock() + mock_response.status_code = 200 mock_response.json.return_value = {key: [{"id": id_value}]} self.session.get.return_value = mock_response @@ -1173,11 +1176,85 @@ class TestResourceActions(base.TestCase): self.assertEqual(id_value, results[0].id) self.assertIsInstance(results[0], self.test_class) + def test_list_response_paginated_without_links(self): + ids = [1, 2] + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.links = {} + mock_response.json.return_value = { + "resources": [{"id": ids[0]}], + "resources_links": [{ + "href": "https://example.com/next-url", + "rel": "next", + }] + } + mock_response2 = mock.Mock() + mock_response2.status_code = 200 + mock_response2.links = {} + mock_response2.json.return_value = { + "resources": [{"id": ids[1]}], + } + + self.session.get.side_effect = [mock_response, mock_response2] + + results = list(self.sot.list(self.session, paginated=True)) + + self.assertEqual(2, len(results)) + self.assertEqual(ids[0], results[0].id) + self.assertEqual(ids[1], results[1].id) + self.assertEqual( + mock.call('base_path', + headers={'Accept': 'application/json'}, params={}), + self.session.get.mock_calls[0]) + self.assertEqual( + mock.call('https://example.com/next-url', + headers={'Accept': 'application/json'}, params={}), + self.session.get.mock_calls[1]) + self.assertEqual(2, len(self.session.get.call_args_list)) + self.assertIsInstance(results[0], self.test_class) + + def test_list_response_paginated_with_links(self): + ids = [1, 2] + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response.links = {} + mock_response.json.side_effect = [ + { + "resources": [{"id": ids[0]}], + "resources_links": [{ + "href": "https://example.com/next-url", + "rel": "next", + }] + }, { + "resources": [{"id": ids[1]}], + }] + + self.session.get.return_value = mock_response + + results = list(self.sot.list(self.session, paginated=True)) + + self.assertEqual(2, len(results)) + self.assertEqual(ids[0], results[0].id) + self.assertEqual(ids[1], results[1].id) + self.assertEqual( + mock.call('base_path', + headers={'Accept': 'application/json'}, params={}), + self.session.get.mock_calls[0]) + self.assertEqual( + mock.call('https://example.com/next-url', + headers={'Accept': 'application/json'}, params={}), + self.session.get.mock_calls[2]) + self.assertEqual(2, len(self.session.get.call_args_list)) + self.assertIsInstance(results[0], self.test_class) + def test_list_multi_page_response_not_paginated(self): ids = [1, 2] mock_response = mock.Mock() - mock_response.json.side_effect = [[{"id": ids[0]}], - [{"id": ids[1]}]] + mock_response.status_code = 200 + mock_response.json.side_effect = [ + {"resources": [{"id": ids[0]}]}, + {"resources": [{"id": ids[1]}]}, + ] self.session.get.return_value = mock_response @@ -1194,10 +1271,16 @@ class TestResourceActions(base.TestCase): uri_param = "uri param!" mock_response = mock.Mock() - mock_response.json.side_effect = [[{"id": id}], - []] + mock_response.status_code = 200 + mock_response.links = {} + mock_response.json.return_value = {"resources": [{"id": id}]} - self.session.get.return_value = mock_response + mock_empty = mock.Mock() + mock_empty.status_code = 200 + mock_empty.links = {} + mock_empty.json.return_value = {"resources": []} + + self.session.get.side_effect = [mock_response, mock_empty] class Test(self.test_class): _query_mapping = resource2.QueryParameters(query_param=qp_name) @@ -1217,19 +1300,33 @@ class TestResourceActions(base.TestCase): Test.base_path % {"something": uri_param}) def test_list_multi_page_response_paginated(self): - # This tests our ability to stop making calls once - # we've received all of the data. However, this tests - # the case that we always receive full pages of data - # and then the signal that there is no more data - an empty list. - # In this case, we need to make one extra request beyond - # the end of data to ensure we've received it all. ids = [1, 2] resp1 = mock.Mock() - resp1.json.return_value = [{"id": ids[0]}] + resp1.status_code = 200 + resp1.links = {} + resp1.json.return_value = { + "resources": [{"id": ids[0]}], + "resources_links": [{ + "href": "https://example.com/next-url", + "rel": "next", + }], + } resp2 = mock.Mock() - resp2.json.return_value = [{"id": ids[1]}] + resp2.status_code = 200 + resp2.links = {} + resp2.json.return_value = { + "resources": [{"id": ids[1]}], + "resources_links": [{ + "href": "https://example.com/next-url", + "rel": "next", + }], + } resp3 = mock.Mock() - resp3.json.return_value = [] + resp3.status_code = 200 + resp3.links = {} + resp3.json.return_value = { + "resources": [] + } self.session.get.side_effect = [resp1, resp2, resp3] @@ -1245,26 +1342,113 @@ class TestResourceActions(base.TestCase): result1 = next(results) self.assertEqual(result1.id, ids[1]) self.session.get.assert_called_with( - self.base_path, + 'https://example.com/next-url', headers={"Accept": "application/json"}, - params={"limit": 1, "marker": 1}) + params={}) self.assertRaises(StopIteration, next, results) self.session.get.assert_called_with( - self.base_path, + 'https://example.com/next-url', headers={"Accept": "application/json"}, - params={"limit": 1, "marker": 2}) + params={}) def test_list_multi_page_early_termination(self): # This tests our ability to be somewhat smart when evaluating - # the contents of the responses. When we receive a full page - # of data, we can be smart about terminating our responses - # once we see that we've received a page with less data than - # expected, saving one request. + # the contents of the responses. When we request a limit and + # receive less than that limit and there are no next links, + # we can be pretty sure there are no more pages. + ids = [1, 2] + resp1 = mock.Mock() + resp1.status_code = 200 + resp1.json.return_value = { + "resources": [{"id": ids[0]}, {"id": ids[1]}], + } + + self.session.get.return_value = resp1 + + results = self.sot.list(self.session, limit=3, paginated=True) + + result0 = next(results) + self.assertEqual(result0.id, ids[0]) + result1 = next(results) + self.assertEqual(result1.id, ids[1]) + self.session.get.assert_called_with( + self.base_path, + headers={"Accept": "application/json"}, + params={"limit": 3}) + + # Ensure we're done after those two items + self.assertRaises(StopIteration, next, results) + + # Ensure we only made one calls to get this done + self.assertEqual(1, len(self.session.get.call_args_list)) + + def test_list_multi_page_inferred_additional(self): + # If we explicitly request a limit and we receive EXACTLY that + # amount of results and there is no next link, we make one additional + # call to check to see if there are more records and the service is + # just sad. + # NOTE(mordred) In a perfect world we would not do this. But it's 2018 + # and I don't think anyone has any illusions that we live in a perfect + # world anymore. ids = [1, 2, 3] resp1 = mock.Mock() + resp1.status_code = 200 + resp1.links = {} + resp1.json.return_value = { + "resources": [{"id": ids[0]}, {"id": ids[1]}], + } + resp2 = mock.Mock() + resp2.status_code = 200 + resp2.links = {} + resp2.json.return_value = {"resources": [{"id": ids[2]}]} + + self.session.get.side_effect = [resp1, resp2] + + results = self.sot.list(self.session, limit=2, paginated=True) + + # Get the first page's two items + result0 = next(results) + self.assertEqual(result0.id, ids[0]) + result1 = next(results) + self.assertEqual(result1.id, ids[1]) + self.session.get.assert_called_with( + self.base_path, + headers={"Accept": "application/json"}, + params={"limit": 2}) + + result2 = next(results) + self.assertEqual(result2.id, ids[2]) + self.session.get.assert_called_with( + self.base_path, + headers={"Accept": "application/json"}, + params={'limit': 2, 'marker': 2}) + + # Ensure we're done after those three items + self.assertRaises(StopIteration, next, results) + + # Ensure we only made two calls to get this done + self.assertEqual(2, len(self.session.get.call_args_list)) + + def test_list_multi_page_header_count(self): + class Test(self.test_class): + resources_key = None + pagination_key = 'X-Container-Object-Count' + self.sot = Test() + + # Swift returns a total number of objects in a header and we compare + # that against the total number returned to know if we need to fetch + # more objects. + ids = [1, 2, 3] + resp1 = mock.Mock() + resp1.status_code = 200 + resp1.links = {} + resp1.headers = {'X-Container-Object-Count': 3} resp1.json.return_value = [{"id": ids[0]}, {"id": ids[1]}] resp2 = mock.Mock() + resp2.status_code = 200 + resp2.links = {} + resp2.headers = {'X-Container-Object-Count': 3} resp2.json.return_value = [{"id": ids[2]}] self.session.get.side_effect = [resp1, resp2] @@ -1281,13 +1465,58 @@ class TestResourceActions(base.TestCase): headers={"Accept": "application/json"}, params={}) - # Second page only has one item result2 = next(results) self.assertEqual(result2.id, ids[2]) self.session.get.assert_called_with( self.base_path, headers={"Accept": "application/json"}, - params={"limit": 2, "marker": 2}) + params={'marker': 2}) + + # Ensure we're done after those three items + self.assertRaises(StopIteration, next, results) + + # Ensure we only made two calls to get this done + self.assertEqual(2, len(self.session.get.call_args_list)) + + def test_list_multi_page_link_header(self): + # Swift returns a total number of objects in a header and we compare + # that against the total number returned to know if we need to fetch + # more objects. + ids = [1, 2, 3] + resp1 = mock.Mock() + resp1.status_code = 200 + resp1.links = { + 'next': {'uri': 'https://example.com/next-url', 'rel': 'next'}} + resp1.headers = {} + resp1.json.return_value = { + "resources": [{"id": ids[0]}, {"id": ids[1]}], + } + resp2 = mock.Mock() + resp2.status_code = 200 + resp2.links = {} + resp2.headers = {} + resp2.json.return_value = {"resources": [{"id": ids[2]}]} + + self.session.get.side_effect = [resp1, resp2] + + results = self.sot.list(self.session, paginated=True) + + # Get the first page's two items + result0 = next(results) + self.assertEqual(result0.id, ids[0]) + result1 = next(results) + self.assertEqual(result1.id, ids[1]) + self.session.get.assert_called_with( + self.base_path, + headers={"Accept": "application/json"}, + params={}) + + result2 = next(results) + self.assertEqual(result2.id, ids[2]) + self.session.get.assert_called_with( + 'https://example.com/next-url', + headers={"Accept": "application/json"}, + params={}) # Ensure we're done after those three items self.assertRaises(StopIteration, next, results)