Prefer links dicts for pagination

The current pagination logic makes call to look for additional items at times even if it's not needed, as it is only able to infer when to make an additional call or not if the user is requesting a limit value in their call. However, calls to list() without limit don't do the right thing. If the user does list() with no limit then list will always have to make an unneeded additional call because it has no limit to compare against. This additional call became evident in the next patch when attempting to use sdk primitives in the shade layer and for applications like nodepool would result in unacceptably higher traffic. Luckily, there are mechanisms available that can be checked to see if pagination is needed. Nova, Cinder and other Nova-like projects provide a top level links dict named {resources_key}_links. The api-wg recommendation is for a top level dict to be provided named 'links'. Glance provides a top-level key named 'next'. Swift returns the total number of objects in a container in a header. The first three can be looked for with no additional options but simply by inspecting the payload. The Swift case requires that we add an optional "pagination_key" option to Resource. Finally, we can keep the existing logic that applies when a limit was given and the number of resources returned exactly matches the limit as a heuristic for making an additional call for resources that do not otherwise return next links. Since most resources that support pagination should reutnr a next link, the fallback logic should only rarely be triggered. If anyone encounters a situation with an unacceptable additional call cost that stems from a service that does not return next links, we can use that as motivation to get the service in question fixed. Change-Id: I7a015f05fb43b63805a5aa9f7e266069d5e52438
2017-12-31 11:58:00 -06:00
parent 46135f9b85
commit e068d891ed
4 changed files with 354 additions and 54 deletions
--- a/openstack/resource2.py
+++ b/openstack/resource2.py
@@ -216,6 +216,8 @@ class Resource(object):
    resource_key = None
    #: Plural form of key for resource.
    resources_key = None
+    #: Key used for pagination links
+    pagination_key = None

    #: The ID of this resource.
    id = Body("id")
@@ -726,45 +728,111 @@ class Resource(object):
        if not cls.allow_list:
            raise exceptions.MethodNotSupported(cls, "list")

-        more_data = True
        query_params = cls._query_mapping._transpose(params)
        uri = cls.base_path % params

-        while more_data:
-            resp = session.get(uri,
-                               headers={"Accept": "application/json"},
-                               params=query_params)
-            resp = resp.json()
+        limit = query_params.get('limit')
+
+        # Track the total number of resources yielded so we can paginate
+        # swift objects
+        total_yielded = 0
+        while uri:
+            # Copy query_params due to weird mock unittest interactions
+            response = session.get(
+                uri,
+                headers={"Accept": "application/json"},
+                params=query_params.copy())
+            exceptions.raise_from_response(response)
+            data = response.json()
+
+            # Discard any existing pagination keys
+            query_params.pop('marker', None)
+            query_params.pop('limit', None)
+
            if cls.resources_key:
-                resp = resp[cls.resources_key]
+                resources = data[cls.resources_key]
+            else:
+                resources = data

-            if not resp:
-                more_data = False
+            if not isinstance(resources, list):
+                resources = [resources]

-            # Keep track of how many items we've yielded. If we yielded
-            # less than our limit, we don't need to do an extra request
-            # to get back an empty data set, which acts as a sentinel.
+            # Keep track of how many items we've yielded. The server should
+            # handle this, but it's easy for us to as well.
            yielded = 0
-            new_marker = None
-            for data in resp:
+            marker = None
+            for raw_resource in resources:
                # Do not allow keys called "self" through. Glance chose
                # to name a key "self", so we need to pop it out because
                # we can't send it through cls.existing and into the
                # Resource initializer. "self" is already the first
                # argument and is practically a reserved word.
-                data.pop("self", None)
+                raw_resource.pop("self", None)

-                value = cls.existing(**data)
-                new_marker = value.id
-                yielded += 1
+                value = cls.existing(**raw_resource)
+                marker = value.id
                yield value
+                yielded += 1
+                total_yielded += 1

-            if not paginated:
+            # If a limit was given by the user and we have not returned
+            # as many records as the limit, then it stands to reason that
+            # there are no more records to return and we don't need to do
+            # anything else.
+            if limit and yielded < limit:
                return
-            if "limit" in query_params and yielded < query_params["limit"]:
+
+            if resources and paginated:
+                uri, next_params = cls._get_next_link(
+                    uri, response, data, marker, limit, total_yielded)
+                query_params.update(next_params)
+            else:
                return
-            query_params["limit"] = yielded
-            query_params["marker"] = new_marker
+
+    @classmethod
+    def _get_next_link(cls, uri, response, data, marker, limit, total_yielded):
+        next_link = None
+        params = {}
+        if isinstance(data, dict):
+            pagination_key = cls.pagination_key
+
+            if not pagination_key and 'links' in data:
+                # api-wg guidelines are for a links dict in the main body
+                pagination_key == 'links'
+            if not pagination_key and cls.resources_key:
+                # Nova has a {key}_links dict in the main body
+                pagination_key = '{key}_links'.format(key=cls.resources_key)
+            if pagination_key:
+                links = data.get(pagination_key, {})
+                for item in links:
+                    if item.get('rel') == 'next' and 'href' in item:
+                        next_link = item['href']
+                        break
+            # Glance has a next field in the main body
+            next_link = next_link or data.get('next')
+        if not next_link and 'next' in response.links:
+            # RFC5988 specifies Link headers and requests parses them if they
+            # are there. We prefer link dicts in resource body, but if those
+            # aren't there and Link headers are, use them.
+            next_link = response.links['next']['uri']
+        # Swift provides a count of resources in a header and a list body
+        if not next_link and cls.pagination_key:
+            total_count = response.headers.get(cls.pagination_key)
+            if total_count:
+                total_count = int(total_count)
+                if total_count > total_yielded:
+                    params['marker'] = marker
+                    if limit:
+                        params['limit'] = limit
+                    next_link = uri
+        # If we still have no link, and limit was given and is non-zero,
+        # and the number of records yielded equals the limit, then the user
+        # is playing pagination ball so we should go ahead and try once more.
+        if not next_link and limit:
+            next_link = uri
+            params['marker'] = marker
+            params['limit'] = limit
+        return next_link, params

    @classmethod
    def _get_one_match(cls, name_or_id, results):
--- a/openstack/tests/unit/meter/v2/test_sample.py
+++ b/openstack/tests/unit/meter/v2/test_sample.py
@@ -65,6 +65,7 @@ class TestSample(testtools.TestCase):
        sess = mock.Mock()
        resp = mock.Mock()
        resp.json = mock.Mock(return_value=[SAMPLE])
+        resp.status_code = 200
        sess.get = mock.Mock(return_value=resp)

        found = sample.Sample.list(sess, counter_name='name_of_meter')
--- a/openstack/tests/unit/network/v2/test_floating_ip.py
+++ b/openstack/tests/unit/network/v2/test_floating_ip.py
@@ -73,6 +73,7 @@ class TestFloatingIP(testtools.TestCase):
        fake_response = mock.Mock()
        body = {floating_ip.FloatingIP.resources_key: [data]}
        fake_response.json = mock.Mock(return_value=body)
+        fake_response.status_code = 200
        mock_session.get = mock.Mock(return_value=fake_response)

        result = floating_ip.FloatingIP.find_available(mock_session)
@@ -88,6 +89,7 @@ class TestFloatingIP(testtools.TestCase):
        fake_response = mock.Mock()
        body = {floating_ip.FloatingIP.resources_key: []}
        fake_response.json = mock.Mock(return_value=body)
+        fake_response.status_code = 200
        mock_session.get = mock.Mock(return_value=fake_response)

        self.assertIsNone(floating_ip.FloatingIP.find_available(mock_session))
--- a/openstack/tests/unit/test_resource2.py
+++ b/openstack/tests/unit/test_resource2.py
@@ -938,6 +938,7 @@ class TestResourceActions(base.TestCase):
        class Test(resource2.Resource):
            service = self.service_name
            base_path = self.base_path
+            resources_key = 'resources'
            allow_create = True
            allow_get = True
            allow_head = True
@@ -1096,7 +1097,9 @@ class TestResourceActions(base.TestCase):
    # the generator. Wrap calls to self.sot.list in a `list`
    # and then test the results as a list of responses.
    def test_list_empty_response(self):
-        mock_response = FakeResponse([])
+        mock_response = mock.Mock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"resources": []}

        self.session.get.return_value = mock_response

@@ -1112,8 +1115,9 @@ class TestResourceActions(base.TestCase):
    def test_list_one_page_response_paginated(self):
        id_value = 1
        mock_response = mock.Mock()
-        mock_response.json.side_effect = [[{"id": id_value}],
-                                          []]
+        mock_response.status_code = 200
+        mock_response.links = {}
+        mock_response.json.return_value = {"resources": [{"id": id_value}]}

        self.session.get.return_value = mock_response

@@ -1123,17 +1127,15 @@ class TestResourceActions(base.TestCase):

        self.assertEqual(1, len(results))

-        # Look at the `params` argument to each of the get calls that
-        # were made.
-        self.session.get.call_args_list[0][1]["params"] = {}
-        self.session.get.call_args_list[1][1]["params"] = {"marker": id_value}
+        self.assertEqual(1, len(self.session.get.call_args_list))
        self.assertEqual(id_value, results[0].id)
        self.assertIsInstance(results[0], self.test_class)

    def test_list_one_page_response_not_paginated(self):
        id_value = 1
        mock_response = mock.Mock()
-        mock_response.json.return_value = [{"id": id_value}]
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"resources": [{"id": id_value}]}

        self.session.get.return_value = mock_response

@@ -1156,6 +1158,7 @@ class TestResourceActions(base.TestCase):

        id_value = 1
        mock_response = mock.Mock()
+        mock_response.status_code = 200
        mock_response.json.return_value = {key: [{"id": id_value}]}

        self.session.get.return_value = mock_response
@@ -1173,11 +1176,85 @@ class TestResourceActions(base.TestCase):
        self.assertEqual(id_value, results[0].id)
        self.assertIsInstance(results[0], self.test_class)

+    def test_list_response_paginated_without_links(self):
+        ids = [1, 2]
+        mock_response = mock.Mock()
+        mock_response.status_code = 200
+        mock_response.links = {}
+        mock_response.json.return_value = {
+            "resources": [{"id": ids[0]}],
+            "resources_links": [{
+                "href": "https://example.com/next-url",
+                "rel": "next",
+            }]
+        }
+        mock_response2 = mock.Mock()
+        mock_response2.status_code = 200
+        mock_response2.links = {}
+        mock_response2.json.return_value = {
+            "resources": [{"id": ids[1]}],
+        }
+
+        self.session.get.side_effect = [mock_response, mock_response2]
+
+        results = list(self.sot.list(self.session, paginated=True))
+
+        self.assertEqual(2, len(results))
+        self.assertEqual(ids[0], results[0].id)
+        self.assertEqual(ids[1], results[1].id)
+        self.assertEqual(
+            mock.call('base_path',
+                      headers={'Accept': 'application/json'}, params={}),
+            self.session.get.mock_calls[0])
+        self.assertEqual(
+            mock.call('https://example.com/next-url',
+                      headers={'Accept': 'application/json'}, params={}),
+            self.session.get.mock_calls[1])
+        self.assertEqual(2, len(self.session.get.call_args_list))
+        self.assertIsInstance(results[0], self.test_class)
+
+    def test_list_response_paginated_with_links(self):
+        ids = [1, 2]
+        mock_response = mock.Mock()
+        mock_response.status_code = 200
+        mock_response.links = {}
+        mock_response.json.side_effect = [
+            {
+                "resources": [{"id": ids[0]}],
+                "resources_links": [{
+                    "href": "https://example.com/next-url",
+                    "rel": "next",
+                }]
+            }, {
+                "resources": [{"id": ids[1]}],
+            }]
+
+        self.session.get.return_value = mock_response
+
+        results = list(self.sot.list(self.session, paginated=True))
+
+        self.assertEqual(2, len(results))
+        self.assertEqual(ids[0], results[0].id)
+        self.assertEqual(ids[1], results[1].id)
+        self.assertEqual(
+            mock.call('base_path',
+                      headers={'Accept': 'application/json'}, params={}),
+            self.session.get.mock_calls[0])
+        self.assertEqual(
+            mock.call('https://example.com/next-url',
+                      headers={'Accept': 'application/json'}, params={}),
+            self.session.get.mock_calls[2])
+        self.assertEqual(2, len(self.session.get.call_args_list))
+        self.assertIsInstance(results[0], self.test_class)
+
    def test_list_multi_page_response_not_paginated(self):
        ids = [1, 2]
        mock_response = mock.Mock()
-        mock_response.json.side_effect = [[{"id": ids[0]}],
-                                          [{"id": ids[1]}]]
+        mock_response.status_code = 200
+        mock_response.json.side_effect = [
+            {"resources": [{"id": ids[0]}]},
+            {"resources": [{"id": ids[1]}]},
+        ]

        self.session.get.return_value = mock_response

@@ -1194,10 +1271,16 @@ class TestResourceActions(base.TestCase):
        uri_param = "uri param!"

        mock_response = mock.Mock()
-        mock_response.json.side_effect = [[{"id": id}],
-                                          []]
+        mock_response.status_code = 200
+        mock_response.links = {}
+        mock_response.json.return_value = {"resources": [{"id": id}]}

-        self.session.get.return_value = mock_response
+        mock_empty = mock.Mock()
+        mock_empty.status_code = 200
+        mock_empty.links = {}
+        mock_empty.json.return_value = {"resources": []}
+
+        self.session.get.side_effect = [mock_response, mock_empty]

        class Test(self.test_class):
            _query_mapping = resource2.QueryParameters(query_param=qp_name)
@@ -1217,19 +1300,33 @@ class TestResourceActions(base.TestCase):
                         Test.base_path % {"something": uri_param})

    def test_list_multi_page_response_paginated(self):
-        # This tests our ability to stop making calls once
-        # we've received all of the data. However, this tests
-        # the case that we always receive full pages of data
-        # and then the signal that there is no more data - an empty list.
-        # In this case, we need to make one extra request beyond
-        # the end of data to ensure we've received it all.
        ids = [1, 2]
        resp1 = mock.Mock()
-        resp1.json.return_value = [{"id": ids[0]}]
+        resp1.status_code = 200
+        resp1.links = {}
+        resp1.json.return_value = {
+            "resources": [{"id": ids[0]}],
+            "resources_links": [{
+                "href": "https://example.com/next-url",
+                "rel": "next",
+            }],
+        }
        resp2 = mock.Mock()
-        resp2.json.return_value = [{"id": ids[1]}]
+        resp2.status_code = 200
+        resp2.links = {}
+        resp2.json.return_value = {
+            "resources": [{"id": ids[1]}],
+            "resources_links": [{
+                "href": "https://example.com/next-url",
+                "rel": "next",
+            }],
+        }
        resp3 = mock.Mock()
-        resp3.json.return_value = []
+        resp3.status_code = 200
+        resp3.links = {}
+        resp3.json.return_value = {
+            "resources": []
+        }

        self.session.get.side_effect = [resp1, resp2, resp3]

@@ -1245,26 +1342,113 @@ class TestResourceActions(base.TestCase):
        result1 = next(results)
        self.assertEqual(result1.id, ids[1])
        self.session.get.assert_called_with(
-            self.base_path,
+            'https://example.com/next-url',
            headers={"Accept": "application/json"},
-            params={"limit": 1, "marker": 1})
+            params={})

        self.assertRaises(StopIteration, next, results)
        self.session.get.assert_called_with(
-            self.base_path,
+            'https://example.com/next-url',
            headers={"Accept": "application/json"},
-            params={"limit": 1, "marker": 2})
+            params={})

    def test_list_multi_page_early_termination(self):
        # This tests our ability to be somewhat smart when evaluating
-        # the contents of the responses. When we receive a full page
-        # of data, we can be smart about terminating our responses
-        # once we see that we've received a page with less data than
-        # expected, saving one request.
+        # the contents of the responses. When we request a limit and
+        # receive less than that limit and there are no next links,
+        # we can be pretty sure there are no more pages.
+        ids = [1, 2]
+        resp1 = mock.Mock()
+        resp1.status_code = 200
+        resp1.json.return_value = {
+            "resources": [{"id": ids[0]}, {"id": ids[1]}],
+        }
+
+        self.session.get.return_value = resp1
+
+        results = self.sot.list(self.session, limit=3, paginated=True)
+
+        result0 = next(results)
+        self.assertEqual(result0.id, ids[0])
+        result1 = next(results)
+        self.assertEqual(result1.id, ids[1])
+        self.session.get.assert_called_with(
+            self.base_path,
+            headers={"Accept": "application/json"},
+            params={"limit": 3})
+
+        # Ensure we're done after those two items
+        self.assertRaises(StopIteration, next, results)
+
+        # Ensure we only made one calls to get this done
+        self.assertEqual(1, len(self.session.get.call_args_list))
+
+    def test_list_multi_page_inferred_additional(self):
+        # If we explicitly request a limit and we receive EXACTLY that
+        # amount of results and there is no next link, we make one additional
+        # call to check to see if there are more records and the service is
+        # just sad.
+        # NOTE(mordred) In a perfect world we would not do this. But it's 2018
+        # and I don't think anyone has any illusions that we live in a perfect
+        # world anymore.
        ids = [1, 2, 3]
        resp1 = mock.Mock()
+        resp1.status_code = 200
+        resp1.links = {}
+        resp1.json.return_value = {
+            "resources": [{"id": ids[0]}, {"id": ids[1]}],
+        }
+        resp2 = mock.Mock()
+        resp2.status_code = 200
+        resp2.links = {}
+        resp2.json.return_value = {"resources": [{"id": ids[2]}]}
+
+        self.session.get.side_effect = [resp1, resp2]
+
+        results = self.sot.list(self.session, limit=2, paginated=True)
+
+        # Get the first page's two items
+        result0 = next(results)
+        self.assertEqual(result0.id, ids[0])
+        result1 = next(results)
+        self.assertEqual(result1.id, ids[1])
+        self.session.get.assert_called_with(
+            self.base_path,
+            headers={"Accept": "application/json"},
+            params={"limit": 2})
+
+        result2 = next(results)
+        self.assertEqual(result2.id, ids[2])
+        self.session.get.assert_called_with(
+            self.base_path,
+            headers={"Accept": "application/json"},
+            params={'limit': 2, 'marker': 2})
+
+        # Ensure we're done after those three items
+        self.assertRaises(StopIteration, next, results)
+
+        # Ensure we only made two calls to get this done
+        self.assertEqual(2, len(self.session.get.call_args_list))
+
+    def test_list_multi_page_header_count(self):
+        class Test(self.test_class):
+            resources_key = None
+            pagination_key = 'X-Container-Object-Count'
+        self.sot = Test()
+
+        # Swift returns a total number of objects in a header and we compare
+        # that against the total number returned to know if we need to fetch
+        # more objects.
+        ids = [1, 2, 3]
+        resp1 = mock.Mock()
+        resp1.status_code = 200
+        resp1.links = {}
+        resp1.headers = {'X-Container-Object-Count': 3}
        resp1.json.return_value = [{"id": ids[0]}, {"id": ids[1]}]
        resp2 = mock.Mock()
+        resp2.status_code = 200
+        resp2.links = {}
+        resp2.headers = {'X-Container-Object-Count': 3}
        resp2.json.return_value = [{"id": ids[2]}]

        self.session.get.side_effect = [resp1, resp2]
@@ -1281,13 +1465,58 @@ class TestResourceActions(base.TestCase):
            headers={"Accept": "application/json"},
            params={})

-        # Second page only has one item
        result2 = next(results)
        self.assertEqual(result2.id, ids[2])
        self.session.get.assert_called_with(
            self.base_path,
            headers={"Accept": "application/json"},
-            params={"limit": 2, "marker": 2})
+            params={'marker': 2})
+
+        # Ensure we're done after those three items
+        self.assertRaises(StopIteration, next, results)
+
+        # Ensure we only made two calls to get this done
+        self.assertEqual(2, len(self.session.get.call_args_list))
+
+    def test_list_multi_page_link_header(self):
+        # Swift returns a total number of objects in a header and we compare
+        # that against the total number returned to know if we need to fetch
+        # more objects.
+        ids = [1, 2, 3]
+        resp1 = mock.Mock()
+        resp1.status_code = 200
+        resp1.links = {
+            'next': {'uri': 'https://example.com/next-url', 'rel': 'next'}}
+        resp1.headers = {}
+        resp1.json.return_value = {
+            "resources": [{"id": ids[0]}, {"id": ids[1]}],
+        }
+        resp2 = mock.Mock()
+        resp2.status_code = 200
+        resp2.links = {}
+        resp2.headers = {}
+        resp2.json.return_value = {"resources": [{"id": ids[2]}]}
+
+        self.session.get.side_effect = [resp1, resp2]
+
+        results = self.sot.list(self.session, paginated=True)
+
+        # Get the first page's two items
+        result0 = next(results)
+        self.assertEqual(result0.id, ids[0])
+        result1 = next(results)
+        self.assertEqual(result1.id, ids[1])
+        self.session.get.assert_called_with(
+            self.base_path,
+            headers={"Accept": "application/json"},
+            params={})
+
+        result2 = next(results)
+        self.assertEqual(result2.id, ids[2])
+        self.session.get.assert_called_with(
+            'https://example.com/next-url',
+            headers={"Accept": "application/json"},
+            params={})

        # Ensure we're done after those three items
        self.assertRaises(StopIteration, next, results)