resource: Fix pagination of nested Glance resources

This is a simple off-by-one error, but there are lots of things feeding
into this that I figured were worth explaining. Apologies in advance for
the length of this screed. Hopefully it's useful for someone.

There's a small bug in how we paginate when listing resources, or rather
when listing Glance objects. Glance doesn't return a 'Links' response
header or 'links' response body field like most other OpenStack
services. Instead, it returns a simple 'next' body field that is
relative. For example, consider the paginated response to the '/images'
API.

  {
    "images": [ ... ],
    "first":"/v2/images",
    "schema":"/v2/schemas/images",
    "next":"/v2/images?marker=823762fb-3128-43cf-8136-cee6b749f708"
  }

As a result, we're forced to treat Glance as yet another "special"
service when it comes to pagination. The first step of this is to look
for the 'next' field and grab the value. As you can see, this value
also includes the version prefix, which we don't want this since
keystoneauth1 already appends the version prefix when generating
absolute URLs from information in the service catalog. This means our
next step is to strip this version filter, which we have done since
change Iad8e69d20783cfe59c3e86bd6980da7dee802869. Unfortunately, the
original author of that change was a little too liberal in their
stripping (heh) and also removed the leading new slash. This causes
issues with the final part of our generation of the next link, namely
the deduplication of query parameters. Because most servers enforce an
upper limit on the size of URLs, it's important that we don't constantly
append our query parameters to the request URL for each page. Instead,
we parse the query parameters present in the 'next' URL and munge them
with our our parameters. The way we do this is using
'urllib.parse.urljoin' method with this pattern:

  urljoin(url, urlparse(url).path)

For example:

  >>> url = 'http://example.com/foo/bar?pet=dog&name=baxter'
  >>> urljoin(url, urlparse(url).path)
  'http://example.com/foo/bar'
  >>> url = '/foo/bar?pet=dog&name=baxter'
  >>> urljoin(url, urlparse(url).path)
  '/foo/bar'

Because we were stripping the leading '/', the first part of the path
was being identified as the server and was stripped.

  >>> url = 'foo/bar?pet=dog&name=baxter'
  >>> urljoin(url, urlparse(url).path)
  'foo/foo/bar'

After all that, the solution is simple. Stop stripping the leading
slash and generate a proper relative URL, as expected.

Change-Id: Ic9e1891ae7171a0cc45dd9faf1b2a6e37535b777
Co-authored-by: Stephen Finucane <stephenfin@redhat.com>
Story: 2010273
Task: 46200
This commit is contained in:
Areg Grigoryan
2022-08-18 15:21:24 +02:00
committed by Stephen Finucane
parent 9fa6603d4e
commit 0529c30ae2
2 changed files with 67 additions and 1 deletions

View File

@@ -2054,34 +2054,40 @@ class Resource(dict):
def _get_next_link(cls, uri, response, data, marker, limit, total_yielded):
next_link = None
params = {}
if isinstance(data, dict):
pagination_key = cls.pagination_key
if not pagination_key and 'links' in data:
# api-wg guidelines are for a links dict in the main body
pagination_key = 'links'
if not pagination_key and cls.resources_key:
# Nova has a {key}_links dict in the main body
pagination_key = '{key}_links'.format(key=cls.resources_key)
if pagination_key:
links = data.get(pagination_key, {})
# keystone might return a dict
if isinstance(links, dict):
links = ({k: v} for k, v in links.items())
for item in links:
if item.get('rel') == 'next' and 'href' in item:
next_link = item['href']
break
# Glance has a next field in the main body
next_link = next_link or data.get('next')
if next_link and next_link.startswith('/v'):
next_link = next_link[next_link.find('/', 1) + 1 :]
next_link = next_link[next_link.find('/', 1):]
if not next_link and 'next' in response.links:
# RFC5988 specifies Link headers and requests parses them if they
# are there. We prefer link dicts in resource body, but if those
# aren't there and Link headers are, use them.
next_link = response.links['next']['uri']
# Swift provides a count of resources in a header and a list body
if not next_link and cls.pagination_key:
total_count = response.headers.get(cls.pagination_key)
@@ -2109,6 +2115,7 @@ class Resource(dict):
next_link = uri
params['marker'] = marker
params['limit'] = limit
return next_link, params
@classmethod

View File

@@ -2205,6 +2205,65 @@ class TestResourceActions(base.TestCase):
self.assertEqual(3, len(self.session.get.call_args_list))
self.assertIsInstance(results[0], self.test_class)
def test_list_response_paginated_with_next_field(self):
"""Test pagination with a 'next' field in the response.
Glance doesn't return a 'links' field in the response. Instead, it
returns a 'first' field and, if there are more pages, a 'next' field in
the response body. Ensure we correctly parse these.
"""
class Test(resource.Resource):
service = self.service_name
base_path = '/foos/bars'
resources_key = 'bars'
allow_list = True
_query_mapping = resource.QueryParameters("wow")
ids = [1, 2]
mock_response = mock.Mock()
mock_response.status_code = 200
mock_response.links = {}
mock_response.json.side_effect = [
{
"bars": [{"id": ids[0]}],
"first": "/v2/foos/bars?wow=cool",
"next": "/v2/foos/bars?marker=baz&wow=cool",
},
{
"bars": [{"id": ids[1]}],
"first": "/v2/foos/bars?wow=cool",
},
]
self.session.get.return_value = mock_response
results = list(Test.list(self.session, paginated=True, wow="cool"))
self.assertEqual(2, len(results))
self.assertEqual(ids[0], results[0].id)
self.assertEqual(ids[1], results[1].id)
self.assertEqual(
mock.call(
Test.base_path,
headers={'Accept': 'application/json'},
params={'wow': 'cool'},
microversion=None,
),
self.session.get.mock_calls[0]
)
self.assertEqual(
mock.call(
'/foos/bars',
headers={'Accept': 'application/json'},
params={'wow': ['cool'], 'marker': ['baz']},
microversion=None,
),
self.session.get.mock_calls[2],
)
self.assertEqual(2, len(self.session.get.call_args_list))
self.assertIsInstance(results[0], Test)
def test_list_response_paginated_with_microversions(self):
class Test(resource.Resource):
service = self.service_name