From b0197656684e3fa8445b859434fe29ba44499fa1 Mon Sep 17 00:00:00 2001
From: Dan Smith <dansmith@redhat.com>
Date: Fri, 31 Jul 2020 11:02:15 -0700
Subject: [PATCH] Inspect upload/import stream and set virtual_size

If we are processing a disk_format that we know about, wrap the
data pipeline with the format inspector and set virtual_size
after upload is complete.

Related to blueprint calculate-virtual-size

Change-Id: I25cd3cde94fefaa5d8ac72f10a075fe34a5df7bf
---
 glance/location.py                        |  24 +++++
 glance/tests/functional/v2/test_images.py | 101 ++++++++++++++++++++++
 glance/tests/unit/test_store_image.py     |  61 +++++++++++--
 glance/tests/unit/utils.py                |  14 +++
 4 files changed, 194 insertions(+), 6 deletions(-)

diff --git a/glance/location.py b/glance/location.py
index 42027f5794..3c20cf86b7 100644
--- a/glance/location.py
+++ b/glance/location.py
@@ -27,6 +27,7 @@ from oslo_utils import encodeutils
 from oslo_utils import excutils
 
 from glance.common import exception
+from glance.common import format_inspector
 from glance.common import utils
 import glance.domain.proxy
 from glance.i18n import _, _LE, _LI, _LW
@@ -550,7 +551,30 @@ class ImageProxy(glance.domain.proxy.Image):
                 img_signature_key_type=key_type
             )
 
+        if not self.image.virtual_size:
+            inspector = format_inspector.get_inspector(self.image.disk_format)
+        else:
+            # No need to do this again
+            inspector = None
+
+        if inspector and self.image.container_format == 'bare':
+            fmt = inspector()
+            data = format_inspector.InfoWrapper(data, fmt)
+            LOG.debug('Enabling in-flight format inspection for %s', fmt)
+        else:
+            fmt = None
+
         self._upload_to_store(data, verifier, backend, size)
+
+        if fmt and fmt.format_match and fmt.virtual_size:
+            self.image.virtual_size = fmt.virtual_size
+            LOG.info('Image format matched and virtual size computed: %i',
+                     self.image.virtual_size)
+        elif fmt:
+            LOG.warning('Image format %s did not match; '
+                        'unable to calculate virtual size',
+                        self.image.disk_format)
+
         if set_active and self.image.status != 'active':
             self.image.status = 'active'
 
diff --git a/glance/tests/functional/v2/test_images.py b/glance/tests/functional/v2/test_images.py
index d2c667ffc1..987a46bf9b 100644
--- a/glance/tests/functional/v2/test_images.py
+++ b/glance/tests/functional/v2/test_images.py
@@ -15,9 +15,12 @@
 
 import hashlib
 import os
+import subprocess
+import tempfile
 import uuid
 
 from oslo_serialization import jsonutils
+from oslo_utils import units
 import requests
 import six
 from six.moves import http_client as http
@@ -889,6 +892,104 @@ class TestImages(functional.FunctionalTest):
 
         self.stop_servers()
 
+    def _create_qcow(self, size):
+        fn = tempfile.mktemp(prefix='glance-unittest-images-',
+                             suffix='.qcow')
+        subprocess.check_output(
+            'qemu-img create -f qcow %s %i' % (fn, size),
+            shell=True)
+        return fn
+
+    def test_image_upload_qcow_virtual_size_calculation(self):
+        self.start_servers(**self.__dict__.copy())
+
+        # Create an image
+        headers = self._headers({'Content-Type': 'application/json'})
+        data = jsonutils.dumps({'name': 'myqcow', 'disk_format': 'qcow2',
+                                'container_format': 'bare'})
+        response = requests.post(self._url('/v2/images'),
+                                 headers=headers, data=data)
+        self.assertEqual(http.CREATED, response.status_code,
+                         'Failed to create: %s' % response.text)
+        image = response.json()
+
+        # Upload a qcow
+        fn = self._create_qcow(128 * units.Mi)
+        raw_size = os.path.getsize(fn)
+        headers = self._headers({'Content-Type': 'application/octet-stream'})
+        response = requests.put(self._url('/v2/images/%s/file' % image['id']),
+                                headers=headers,
+                                data=open(fn, 'rb').read())
+        os.remove(fn)
+        self.assertEqual(http.NO_CONTENT, response.status_code)
+
+        # Check the image attributes
+        response = requests.get(self._url('/v2/images/%s' % image['id']),
+                                headers=self._headers())
+        self.assertEqual(http.OK, response.status_code)
+        image = response.json()
+        self.assertEqual(128 * units.Mi, image['virtual_size'])
+        self.assertEqual(raw_size, image['size'])
+
+    def test_image_import_qcow_virtual_size_calculation(self):
+        self.start_servers(**self.__dict__.copy())
+
+        # Create an image
+        headers = self._headers({'Content-Type': 'application/json'})
+        data = jsonutils.dumps({'name': 'myqcow', 'disk_format': 'qcow2',
+                                'container_format': 'bare'})
+        response = requests.post(self._url('/v2/images'),
+                                 headers=headers, data=data)
+        self.assertEqual(http.CREATED, response.status_code,
+                         'Failed to create: %s' % response.text)
+        image = response.json()
+
+        # Stage a qcow
+        fn = self._create_qcow(128 * units.Mi)
+        raw_size = os.path.getsize(fn)
+        headers = self._headers({'Content-Type': 'application/octet-stream'})
+        response = requests.put(self._url('/v2/images/%s/stage' % image['id']),
+                                headers=headers,
+                                data=open(fn, 'rb').read())
+        os.remove(fn)
+        self.assertEqual(http.NO_CONTENT, response.status_code)
+
+        # Verify image is in uploading state and checksum is None
+        func_utils.verify_image_hashes_and_status(self, image['id'],
+                                                  status='uploading')
+
+        # Import image to store
+        path = self._url('/v2/images/%s/import' % image['id'])
+        headers = self._headers({
+            'content-type': 'application/json',
+            'X-Roles': 'admin',
+        })
+        data = jsonutils.dumps({'method': {
+            'name': 'glance-direct'
+        }})
+        response = requests.post(
+            self._url('/v2/images/%s/import' % image['id']),
+            headers=headers, data=data)
+        self.assertEqual(http.ACCEPTED, response.status_code)
+
+        # Verify image is in active state and checksum is set
+        # NOTE(abhishekk): As import is a async call we need to provide
+        # some timelap to complete the call.
+        path = self._url('/v2/images/%s' % image['id'])
+        func_utils.wait_for_status(request_path=path,
+                                   request_headers=self._headers(),
+                                   status='active',
+                                   max_sec=15,
+                                   delay_sec=0.2)
+
+        # Check the image attributes
+        response = requests.get(self._url('/v2/images/%s' % image['id']),
+                                headers=self._headers())
+        self.assertEqual(http.OK, response.status_code)
+        image = response.json()
+        self.assertEqual(128 * units.Mi, image['virtual_size'])
+        self.assertEqual(raw_size, image['size'])
+
     def test_hidden_images(self):
         # Image list should be empty
         self.api_server.show_multiple_locations = True
diff --git a/glance/tests/unit/test_store_image.py b/glance/tests/unit/test_store_image.py
index 629f7be474..8629ab66a9 100644
--- a/glance/tests/unit/test_store_image.py
+++ b/glance/tests/unit/test_store_image.py
@@ -54,6 +54,9 @@ class ImageStub(object):
         self.os_hash_algo = None
         self.os_hash_value = None
         self.checksum = None
+        self.disk_format = 'raw'
+        self.container_format = 'bare'
+        self.virtual_size = 0
 
     def delete(self):
         self.status = 'deleted'
@@ -110,6 +113,7 @@ class TestStoreMultiBackends(utils.BaseTestCase):
         }
         image_stub = ImageStub(UUID2, status='queued', locations=[],
                                extra_properties=extra_properties)
+        image_stub.disk_format = 'iso'
         image = glance.location.ImageProxy(image_stub, context,
                                            self.store_api, self.store_utils)
         with mock.patch.object(image, "_upload_to_store") as mloc:
@@ -237,9 +241,10 @@ class TestStoreImage(utils.BaseTestCase):
         self.mock_object(unit_test_utils.FakeStoreAPI, 'get_from_backend',
                          fake_get_from_backend)
         # This time, image1.get_data() returns the data wrapped in a
-        # LimitingReader|CooperativeReader pipeline, so peeking under
-        # the hood of those objects to get at the underlying string.
-        self.assertEqual('ZZZ', image1.get_data().data.fd)
+        # LimitingReader|CooperativeReader|InfoWrapper pipeline, so
+        # peeking under the hood of those objects to get at the
+        # underlying string.
+        self.assertEqual('ZZZ', image1.get_data().data.fd._source)
 
         image1.locations.pop(0)
         self.assertEqual(1, len(image1.locations))
@@ -248,14 +253,57 @@ class TestStoreImage(utils.BaseTestCase):
     def test_image_set_data(self):
         context = glance.context.RequestContext(user=USER1)
         image_stub = ImageStub(UUID2, status='queued', locations=[])
+        # We are going to pass an iterable data source, so use the
+        # FakeStoreAPIReader that actually reads from that data
+        store_api = unit_test_utils.FakeStoreAPIReader()
         image = glance.location.ImageProxy(image_stub, context,
-                                           self.store_api, self.store_utils)
-        image.set_data('YYYY', 4)
+                                           store_api, self.store_utils)
+        image.set_data(iter(['YYYY']), 4)
         self.assertEqual(4, image.size)
         # NOTE(markwash): FakeStore returns image_id for location
         self.assertEqual(UUID2, image.locations[0]['url'])
         self.assertEqual('Z', image.checksum)
         self.assertEqual('active', image.status)
+        self.assertEqual(4, image.virtual_size)
+
+    def test_image_set_data_inspector_no_match(self):
+        context = glance.context.RequestContext(user=USER1)
+        image_stub = ImageStub(UUID2, status='queued', locations=[])
+        image_stub.disk_format = 'qcow2'
+        # We are going to pass an iterable data source, so use the
+        # FakeStoreAPIReader that actually reads from that data
+        store_api = unit_test_utils.FakeStoreAPIReader()
+        image = glance.location.ImageProxy(image_stub, context,
+                                           store_api, self.store_utils)
+        image.set_data(iter(['YYYY']), 4)
+        self.assertEqual(4, image.size)
+        # NOTE(markwash): FakeStore returns image_id for location
+        self.assertEqual(UUID2, image.locations[0]['url'])
+        self.assertEqual('Z', image.checksum)
+        self.assertEqual('active', image.status)
+        self.assertEqual(0, image.virtual_size)
+
+    @mock.patch('glance.common.format_inspector.get_inspector')
+    def test_image_set_data_inspector_not_needed(self, mock_gi):
+        context = glance.context.RequestContext(user=USER1)
+        image_stub = ImageStub(UUID2, status='queued', locations=[])
+        image_stub.virtual_size = 123
+        image_stub.disk_format = 'qcow2'
+        # We are going to pass an iterable data source, so use the
+        # FakeStoreAPIReader that actually reads from that data
+        store_api = unit_test_utils.FakeStoreAPIReader()
+        image = glance.location.ImageProxy(image_stub, context,
+                                           store_api, self.store_utils)
+        image.set_data(iter(['YYYY']), 4)
+        self.assertEqual(4, image.size)
+        # NOTE(markwash): FakeStore returns image_id for location
+        self.assertEqual(UUID2, image.locations[0]['url'])
+        self.assertEqual('Z', image.checksum)
+        self.assertEqual('active', image.status)
+        self.assertEqual(123, image.virtual_size)
+        # If the image already had virtual_size set (i.e. we're setting
+        # a new location), we should not re-calculate the value.
+        mock_gi.assert_not_called()
 
     def test_image_set_data_location_metadata(self):
         context = glance.context.RequestContext(user=USER1)
@@ -281,6 +329,7 @@ class TestStoreImage(utils.BaseTestCase):
     def test_image_set_data_unknown_size(self):
         context = glance.context.RequestContext(user=USER1)
         image_stub = ImageStub(UUID2, status='queued', locations=[])
+        image_stub.disk_format = 'iso'
         image = glance.location.ImageProxy(image_stub, context,
                                            self.store_api, self.store_utils)
         image.set_data('YYYY', None)
@@ -312,7 +361,7 @@ class TestStoreImage(utils.BaseTestCase):
                                            self.store_api, self.store_utils)
         image.set_data('YYYY', 4)
         self.assertEqual('active', image.status)
-        mock_log.info.assert_called_once_with(
+        mock_log.info.assert_any_call(
             u'Successfully verified signature for image %s',
             UUID2)
 
diff --git a/glance/tests/unit/utils.py b/glance/tests/unit/utils.py
index 2940b24f1b..a843a07e01 100644
--- a/glance/tests/unit/utils.py
+++ b/glance/tests/unit/utils.py
@@ -270,6 +270,20 @@ class FakeStoreAPI(object):
         pass
 
 
+class FakeStoreAPIReader(FakeStoreAPI):
+    """A store API that actually reads from the data pipe."""
+
+    def add_to_backend_with_multihash(self, conf, image_id, data, size,
+                                      hashing_algo, scheme=None, context=None,
+                                      verifier=None):
+        for chunk in data:
+            pass
+
+        return super(FakeStoreAPIReader, self).add_to_backend_with_multihash(
+            conf, image_id, data, size, hashing_algo,
+            scheme=scheme, context=context, verifier=verifier)
+
+
 class FakePolicyEnforcer(object):
     def __init__(self, *_args, **kwargs):
         self.rules = {}