Enable streaming responses in download_image

Previously, the openstack.image.image_download method would place the
contents of a remote image into a Python variable.  With this change,
the download_image method can optionally return the
requests.Response object returned by session.get(), which permits
the caller to download the image in chunks using the iter_content
method.  This can prevent performance issues when dealing with
large images.

Change-Id: Ie62ebcc895ca893321a10def18ac5d74c7c843b9
This commit is contained in:
Lars Kellogg-Stedman
2017-02-23 14:50:42 -05:00
committed by Brian Curtin
parent f4e4d1496f
commit 5b2df7e724
5 changed files with 95 additions and 9 deletions

View File

@@ -119,7 +119,8 @@ latex_documents = [
]
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {'https://docs.python.org/3/': None}
intersphinx_mapping = {'https://docs.python.org/3/': None,
'http://docs.python-requests.org/en/master/': None}
# Include both the class and __init__ docstrings when describing the class
autoclass_content = "both"

View File

@@ -32,6 +32,40 @@ Create an image by uploading its data and setting its attributes.
Full example: `image resource create`_
.. _download_image-stream-true:
Downloading an Image with stream=True
-------------------------------------
As images are often very large pieces of data, storing their entire contents
in the memory of your application can be less than desirable. A more
efficient method may be to iterate over a stream of the response data.
By choosing to stream the response content, you determine the ``chunk_size``
that is appropriate for your needs, meaning only that many bytes of data are
read for each iteration of the loop until all data has been consumed.
See :meth:`requests.Response.iter_content` for more information, as well
as Requests' :ref:`body-content-workflow`.
When you choose to stream an image download, openstacksdk is no longer
able to compute the checksum of the response data for you. This example
shows how you might do that yourself, in a very similar manner to how
the library calculates checksums for non-streamed responses.
.. literalinclude:: ../examples/image/download.py
:pyobject: download_image_stream
Downloading an Image with stream=False
--------------------------------------
If you wish to download an image's contents all at once and to memory,
simply set ``stream=False``, which is the default.
.. literalinclude:: ../examples/image/download.py
:pyobject: download_image
Full example: `image resource download`_
Delete Image
------------
@@ -45,3 +79,4 @@ Full example: `image resource delete`_
.. _image resource create: http://git.openstack.org/cgit/openstack/python-openstacksdk/tree/examples/image/create.py
.. _image resource delete: http://git.openstack.org/cgit/openstack/python-openstacksdk/tree/examples/image/delete.py
.. _image resource list: http://git.openstack.org/cgit/openstack/python-openstacksdk/tree/examples/image/list.py
.. _image resource download: http://git.openstack.org/cgit/openstack/python-openstacksdk/tree/examples/image/download.py

View File

@@ -60,16 +60,40 @@ class Proxy(proxy2.BaseProxy):
return img
def download_image(self, image):
def download_image(self, image, stream=False):
"""Download an image
This will download an image to memory when ``stream=False``, or allow
streaming downloads using an iterator when ``stream=True``.
For examples of working with streamed responses, see
:ref:`download_image-stream-true` and the Requests documentation
:ref:`body-content-workflow`.
:param image: The value can be either the ID of an image or a
:class:`~openstack.image.v2.image.Image` instance.
:returns: The bytes comprising the given Image.
:param bool stream: When ``True``, return a :class:`requests.Response`
instance allowing you to iterate over the
response data stream instead of storing its entire
contents in memory. See
:meth:`requests.Response.iter_content` for more
details. *NOTE*: If you do not consume
the entirety of the response you must explicitly
call :meth:`requests.Response.close` or otherwise
risk inefficiencies with the ``requests``
library's handling of connections.
When ``False``, return the entire
contents of the response.
:returns: The bytes comprising the given Image when stream is
False, otherwise a :class:`requests.Response`
instance.
"""
image = self._get_resource(_image.Image, image)
return image.download(self._session)
return image.download(self._session, stream=stream)
def delete_image(self, image, ignore_missing=True):
"""Delete an image

View File

@@ -246,12 +246,12 @@ class Image(resource2.Resource):
headers={"Content-Type": "application/octet-stream",
"Accept": ""})
def download(self, session):
def download(self, session, stream=False):
"""Download the data contained in an image"""
# TODO(briancurtin): This method should probably offload the get
# operation into another thread or something of that nature.
url = utils.urljoin(self.base_path, self.id, 'file')
resp = session.get(url, endpoint_filter=self.service)
resp = session.get(url, endpoint_filter=self.service, stream=stream)
# See the following bug report for details on why the checksum
# code may sometimes depend on a second GET call.
@@ -265,6 +265,14 @@ class Image(resource2.Resource):
details = self.get(session)
checksum = details.checksum
# if we are returning the repsonse object, ensure that it
# has the content-md5 header so that the caller doesn't
# need to jump through the same hoops through which we
# just jumped.
if stream:
resp.headers['content-md5'] = checksum
return resp
if checksum is not None:
digest = hashlib.md5(resp.content).hexdigest()
if digest != checksum:

View File

@@ -212,7 +212,8 @@ class TestImage(testtools.TestCase):
rv = sot.download(self.sess)
self.sess.get.assert_called_with('images/IDENTIFIER/file',
endpoint_filter=sot.service)
endpoint_filter=sot.service,
stream=False)
self.assertEqual(rv, resp.content)
@@ -242,7 +243,8 @@ class TestImage(testtools.TestCase):
rv = sot.download(self.sess)
self.sess.get.assert_has_calls(
[mock.call('images/IDENTIFIER/file', endpoint_filter=sot.service),
[mock.call('images/IDENTIFIER/file', endpoint_filter=sot.service,
stream=False),
mock.call('images/IDENTIFIER', endpoint_filter=sot.service)])
self.assertEqual(rv, resp1.content)
@@ -270,7 +272,23 @@ class TestImage(testtools.TestCase):
log.records[0].msg)
self.sess.get.assert_has_calls(
[mock.call('images/IDENTIFIER/file', endpoint_filter=sot.service),
[mock.call('images/IDENTIFIER/file', endpoint_filter=sot.service,
stream=False),
mock.call('images/IDENTIFIER', endpoint_filter=sot.service)])
self.assertEqual(rv, resp1.content)
def test_download_stream(self):
sot = image.Image(**EXAMPLE)
resp = mock.Mock()
resp.content = b"abc"
resp.headers = {"Content-MD5": "900150983cd24fb0d6963f7d28e17f72"}
self.sess.get.return_value = resp
rv = sot.download(self.sess, stream=True)
self.sess.get.assert_called_with('images/IDENTIFIER/file',
endpoint_filter=sot.service,
stream=True)
self.assertEqual(rv, resp)