From a76eefed62db96fe51ef40e3209c187af3eb9834 Mon Sep 17 00:00:00 2001
From: Balazs Gibizer <balazs.gibizer@ericsson.com>
Date: Thu, 17 Jan 2019 17:41:59 +0100
Subject: [PATCH] Add functional test for libvirt vgpu reshape

The added functional test covers the reshape code in the libvirt
driver for VGPU and all the higher level code interacting with this
reshape process. It verifies the following sequence:

* boot two instances with VGPU resource request on an old compute where
  the VGPU resource are provided by the compute RP
* trigger a reshape and verify that the RP tree and the instance
  allocations are reshaped properly
* boot another instance against the new tree and verify its allocation

Part of blueprint reshape-provider-tree

Co-Authored-By: Sylvain Bauza <sbauza@free.fr>

Change-Id: Ide797ebf7790d69042ae275ebec6ced3fa4787b6
---
 nova/tests/functional/libvirt/base.py         |   9 +-
 nova/tests/functional/libvirt/test_reshape.py | 200 ++++++++++++++++++
 nova/tests/unit/virt/libvirt/fakelibvirt.py   | 102 ++++++++-
 3 files changed, 305 insertions(+), 6 deletions(-)
 create mode 100644 nova/tests/functional/libvirt/test_reshape.py

diff --git a/nova/tests/functional/libvirt/base.py b/nova/tests/functional/libvirt/base.py
index e3a2893541fb..d8b9e184ce94 100644
--- a/nova/tests/functional/libvirt/base.py
+++ b/nova/tests/functional/libvirt/base.py
@@ -57,11 +57,14 @@ class ServersTestBase(base.ServersTestBase):
         # service in the test
         self.flags(compute_driver='libvirt.LibvirtDriver')
 
-    def _get_connection(self, host_info, pci_info=None):
+    def _get_connection(self, host_info, pci_info=None,
+                        libvirt_version=fakelibvirt.FAKE_LIBVIRT_VERSION,
+                        mdev_info=None):
         fake_connection = fakelibvirt.Connection(
             'qemu:///system',
-            version=fakelibvirt.FAKE_LIBVIRT_VERSION,
+            version=libvirt_version,
             hv_version=fakelibvirt.FAKE_QEMU_VERSION,
             host_info=host_info,
-            pci_info=pci_info)
+            pci_info=pci_info,
+            mdev_info=mdev_info)
         return fake_connection
diff --git a/nova/tests/functional/libvirt/test_reshape.py b/nova/tests/functional/libvirt/test_reshape.py
new file mode 100644
index 000000000000..a9bdb0d5eeb2
--- /dev/null
+++ b/nova/tests/functional/libvirt/test_reshape.py
@@ -0,0 +1,200 @@
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import time
+
+import mock
+from oslo_config import cfg
+from oslo_log import log as logging
+
+from nova.tests.functional.libvirt import base
+from nova.tests.unit.virt.libvirt import fakelibvirt
+
+CONF = cfg.CONF
+LOG = logging.getLogger(__name__)
+
+
+class VGPUReshapeTests(base.ServersTestBase):
+    # the minimum libvirt version needed for vgpu
+    MIN_LIBVIRT_MDEV_SUPPORT = 3004000
+
+    def _wait_for_state_change(self, server, expected_status):
+        for i in range(0, 50):
+            server = self.api.get_server(server['id'])
+            if server['status'] == expected_status:
+                return server
+            time.sleep(.1)
+        self.assertEqual(expected_status, server['status'])
+        return server
+
+    def test_create_servers_with_vgpu(self):
+        """Verify that vgpu reshape works with libvirt driver
+
+        1) create two servers with an old tree where the VGPU resource is on
+           the compute provider
+        2) trigger a reshape
+        3) check that the allocations of the servers are still valid
+        4) create another server now against the new tree
+        """
+
+        # NOTE(gibi): We cannot simply ask the virt driver to create an old
+        # RP tree with vgpu on the root RP as that code path does not exist
+        # any more. So we have to hack a "bit". We will create a compute
+        # service without vgpu support to have the compute RP ready then we
+        # manually add the VGPU resources to that RP in placement. Also we make
+        # sure that during the instance claim the virt driver does not detect
+        # the old tree as that would be a bad time for reshape. Later when the
+        # compute service is restarted the driver will do the reshape.
+
+        fake_connection = self._get_connection(
+            # We need more RAM or the 3rd server won't be created
+            host_info=fakelibvirt.HostInfo(kB_mem=8192),
+            libvirt_version=self.MIN_LIBVIRT_MDEV_SUPPORT,
+            mdev_info=fakelibvirt.HostMdevDevicesInfo())
+        self.mock_conn.return_value = fake_connection
+
+        # start a compute with vgpu support disabled so the driver will
+        # ignore the content of the above HostMdevDeviceInfo
+        self.flags(enabled_vgpu_types='', group='devices')
+        self.compute = self.start_service('compute', host='compute1')
+
+        # create the VGPU resource in placement manually
+        compute_rp_uuid = self.placement_api.get(
+            '/resource_providers?name=compute1').body[
+            'resource_providers'][0]['uuid']
+        inventories = self.placement_api.get(
+            '/resource_providers/%s/inventories' % compute_rp_uuid).body
+        inventories['inventories']['VGPU'] = {
+            'allocation_ratio': 1.0,
+            'max_unit': 3,
+            'min_unit': 1,
+            'reserved': 0,
+            'step_size': 1,
+            'total': 3}
+        self.placement_api.put(
+            '/resource_providers/%s/inventories' % compute_rp_uuid,
+            inventories)
+
+        # now we boot two servers with vgpu
+        extra_spec = {"resources:VGPU": 1}
+        flavor_id = self._create_flavor(extra_spec=extra_spec)
+
+        server_req = self._build_server(flavor_id)
+
+        # NOTE(gibi): during instance_claim() there is a
+        # driver.update_provider_tree() call that would detect the old tree and
+        # would fail as this is not a good time to reshape. To avoid that we
+        # temporally mock update_provider_tree here.
+        with mock.patch('nova.virt.libvirt.driver.LibvirtDriver.'
+                        'update_provider_tree'):
+            created_server1 = self.api.post_server({'server': server_req})
+            server1 = self._wait_for_state_change(created_server1, 'ACTIVE')
+            created_server2 = self.api.post_server({'server': server_req})
+            server2 = self._wait_for_state_change(created_server2, 'ACTIVE')
+
+        # verify that the inventory, usages and allocation are correct before
+        # the reshape
+        compute_inventory = self.placement_api.get(
+            '/resource_providers/%s/inventories' % compute_rp_uuid).body[
+            'inventories']
+        self.assertEqual(3, compute_inventory['VGPU']['total'])
+        compute_usages = self.placement_api.get(
+            '/resource_providers/%s/usages' % compute_rp_uuid).body[
+            'usages']
+        self.assertEqual(2, compute_usages['VGPU'])
+
+        for server in (server1, server2):
+            allocations = self.placement_api.get(
+                '/allocations/%s' % server['id']).body['allocations']
+            # the flavor has disk=10 and ephemeral=10
+            self.assertEqual(
+                {'DISK_GB': 20, 'MEMORY_MB': 2048, 'VCPU': 2, 'VGPU': 1},
+                allocations[compute_rp_uuid]['resources'])
+
+        # enabled vgpu support
+        self.flags(
+            enabled_vgpu_types=fakelibvirt.NVIDIA_11_VGPU_TYPE,
+            group='devices')
+        # restart compute which will trigger a reshape
+        self.restart_compute_service(self.compute)
+
+        # verify that the inventory, usages and allocation are correct after
+        # the reshape
+        compute_inventory = self.placement_api.get(
+            '/resource_providers/%s/inventories' % compute_rp_uuid).body[
+            'inventories']
+        self.assertNotIn('VGPU', compute_inventory)
+
+        # NOTE(sbauza): The two instances will use two different pGPUs
+        # That said, we need to check all the pGPU inventories for knowing
+        # which ones are used.
+        usages = {}
+        for pci_device in [fakelibvirt.PGPU1_PCI_ADDR,
+                           fakelibvirt.PGPU2_PCI_ADDR,
+                           fakelibvirt.PGPU3_PCI_ADDR]:
+            gpu_rp_uuid = self.placement_api.get(
+                '/resource_providers?name=compute1_%s' % pci_device).body[
+                'resource_providers'][0]['uuid']
+            gpu_inventory = self.placement_api.get(
+                '/resource_providers/%s/inventories' % gpu_rp_uuid).body[
+                'inventories']
+            self.assertEqual(1, gpu_inventory['VGPU']['total'])
+
+            gpu_usages = self.placement_api.get(
+                '/resource_providers/%s/usages' % gpu_rp_uuid).body[
+                'usages']
+            usages[pci_device] = gpu_usages['VGPU']
+        # Make sure that both instances are using different pGPUs
+        used_devices = [dev for dev, usage in usages.items() if usage == 1]
+        avail_devices = list(set(usages.keys()) - set(used_devices))
+        self.assertEqual(2, len(used_devices))
+
+        for server in [server1, server2]:
+            allocations = self.placement_api.get(
+                '/allocations/%s' % server['id']).body[
+                'allocations']
+            self.assertEqual(
+                {'DISK_GB': 20, 'MEMORY_MB': 2048, 'VCPU': 2},
+                allocations[compute_rp_uuid]['resources'])
+            rp_uuids = list(allocations.keys())
+            # We only have two RPs, the compute PR (the root) and the child
+            # pGPU RP
+            gpu_rp_uuid = (rp_uuids[1] if rp_uuids[0] == compute_rp_uuid
+                           else rp_uuids[0])
+            self.assertEqual(
+                {'VGPU': 1},
+                allocations[gpu_rp_uuid]['resources'])
+
+        # now create one more instance with vgpu against the reshaped tree
+        created_server = self.api.post_server({'server': server_req})
+        server3 = self._wait_for_state_change(created_server, 'ACTIVE')
+
+        # find the pGPU that wasn't used before we created the third instance
+        # It should have taken the previously available pGPU
+        device = avail_devices[0]
+        gpu_rp_uuid = self.placement_api.get(
+            '/resource_providers?name=compute1_%s' % device).body[
+            'resource_providers'][0]['uuid']
+        gpu_usages = self.placement_api.get(
+            '/resource_providers/%s/usages' % gpu_rp_uuid).body[
+            'usages']
+        self.assertEqual(1, gpu_usages['VGPU'])
+
+        allocations = self.placement_api.get(
+            '/allocations/%s' % server3['id']).body[
+            'allocations']
+        self.assertEqual(
+            {'DISK_GB': 20, 'MEMORY_MB': 2048, 'VCPU': 2},
+            allocations[compute_rp_uuid]['resources'])
+        self.assertEqual(
+            {'VGPU': 1},
+            allocations[gpu_rp_uuid]['resources'])
diff --git a/nova/tests/unit/virt/libvirt/fakelibvirt.py b/nova/tests/unit/virt/libvirt/fakelibvirt.py
index dbd5a84e4e5c..c9d26d0b7c7e 100644
--- a/nova/tests/unit/virt/libvirt/fakelibvirt.py
+++ b/nova/tests/unit/virt/libvirt/fakelibvirt.py
@@ -172,6 +172,11 @@ VF_DRIVER_NAME = 'ixgbevf'
 VF_SLOT = '10'
 PF_SLOT = '00'
 
+NVIDIA_11_VGPU_TYPE = 'nvidia-11'
+PGPU1_PCI_ADDR = 'pci_0000_06_00_0'
+PGPU2_PCI_ADDR = 'pci_0000_07_00_0'
+PGPU3_PCI_ADDR = 'pci_0000_08_00_0'
+
 
 class FakePciDevice(object):
     pci_dev_template = """<device>
@@ -293,6 +298,57 @@ class HostPciSRIOVDevicesInfo(object):
         return pci_dev
 
 
+class FakeMdevDevice(object):
+    template = """
+    <device>
+      <name>%(dev_name)s</name>
+      <path>/sys/devices/pci0000:00/0000:00:02.0/%(path)s</path>
+      <parent>%(parent)s</parent>
+      <driver>
+        <name>vfio_mdev</name>
+      </driver>
+      <capability type='mdev'>
+        <type id='%(type_id)s'/>
+        <iommuGroup number='12'/>
+      </capability>
+    </device>
+    """
+
+    def __init__(self, dev_name, type_id, parent):
+        self.xml = self.template % {
+            'dev_name': dev_name, 'type_id': type_id,
+            'path': dev_name[len('mdev_'):],
+            'parent': parent}
+
+    def XMLDesc(self, flags):
+        return self.xml
+
+
+class HostMdevDevicesInfo(object):
+    def __init__(self):
+        self.devices = {
+            'mdev_4b20d080_1b54_4048_85b3_a6a62d165c01':
+                FakeMdevDevice(
+                    dev_name='mdev_4b20d080_1b54_4048_85b3_a6a62d165c01',
+                    type_id=NVIDIA_11_VGPU_TYPE, parent=PGPU1_PCI_ADDR),
+            'mdev_4b20d080_1b54_4048_85b3_a6a62d165c02':
+                FakeMdevDevice(
+                    dev_name='mdev_4b20d080_1b54_4048_85b3_a6a62d165c02',
+                    type_id=NVIDIA_11_VGPU_TYPE, parent=PGPU2_PCI_ADDR),
+            'mdev_4b20d080_1b54_4048_85b3_a6a62d165c03':
+                FakeMdevDevice(
+                    dev_name='mdev_4b20d080_1b54_4048_85b3_a6a62d165c03',
+                    type_id=NVIDIA_11_VGPU_TYPE, parent=PGPU3_PCI_ADDR),
+        }
+
+    def get_all_devices(self):
+        return self.devices.keys()
+
+    def get_device_by_name(self, device_name):
+        dev = self.devices[device_name]
+        return dev
+
+
 class HostInfo(object):
 
     def __init__(self, arch=obj_fields.Architecture.X86_64, kB_mem=4096,
@@ -705,6 +761,20 @@ class Domain(object):
 
             devices['nics'] = nics_info
 
+            hostdev_info = []
+            hostdevs = device_nodes.findall('./hostdev')
+            for hostdev in hostdevs:
+                address = hostdev.find('./source/address')
+                # NOTE(gibi): only handle mdevs as pci is complicated
+                dev_type = hostdev.get('type')
+                if dev_type == 'mdev':
+                    hostdev_info.append({
+                        'type': dev_type,
+                        'model': hostdev.get('model'),
+                        'address_uuid': address.get('uuid')
+                    })
+            devices['hostdevs'] = hostdev_info
+
         definition['devices'] = devices
 
         return definition
@@ -844,6 +914,15 @@ class Domain(object):
                function='0x0'/>
     </interface>''' % nic
 
+        hostdevs = ''
+        for hostdev in self._def['devices']['hostdevs']:
+            hostdevs += '''<hostdev mode='subsystem' type='%(type)s' model='%(model)s'>
+    <source>
+      <address uuid='%(address_uuid)s'/>
+    </source>
+    </hostdev>
+            ''' % hostdev
+
         return '''<domain type='kvm'>
   <name>%(name)s</name>
   <uuid>%(uuid)s</uuid>
@@ -899,6 +978,7 @@ class Domain(object):
       <address type='pci' domain='0x0000' bus='0x00' slot='0x04'
                function='0x0'/>
     </memballoon>
+    %(hostdevs)s
   </devices>
 </domain>''' % {'name': self._def['name'],
                 'uuid': self._def['uuid'],
@@ -906,7 +986,8 @@ class Domain(object):
                 'vcpu': self._def['vcpu'],
                 'arch': self._def['os']['arch'],
                 'disks': disks,
-                'nics': nics}
+                'nics': nics,
+                'hostdevs': hostdevs}
 
     def managedSave(self, flags):
         self._connection._mark_not_running(self)
@@ -995,7 +1076,8 @@ class DomainSnapshot(object):
 
 class Connection(object):
     def __init__(self, uri=None, readonly=False, version=FAKE_LIBVIRT_VERSION,
-                 hv_version=FAKE_QEMU_VERSION, host_info=None, pci_info=None):
+                 hv_version=FAKE_QEMU_VERSION, host_info=None, pci_info=None,
+                 mdev_info=None):
         if not uri or uri == '':
             if allow_default_uri_connection:
                 uri = 'qemu:///session'
@@ -1031,6 +1113,7 @@ class Connection(object):
         self.host_info = host_info or HostInfo()
         self.pci_info = pci_info or HostPciSRIOVDevicesInfo(num_pfs=0,
                                                             num_vfs=0)
+        self.mdev_info = mdev_info or []
 
     def _add_filter(self, nwfilter):
         self._nwfilters[nwfilter._name] = nwfilter
@@ -1439,6 +1522,9 @@ class Connection(object):
         return self.pci_info.get_device_by_name(dev_name)
 
     def nodeDeviceLookupByName(self, name):
+        if name.startswith('mdev'):
+            return self.mdev_info.get_device_by_name(name)
+
         pci_dev = self.pci_info.get_device_by_name(name)
         if pci_dev:
             return pci_dev
@@ -1452,7 +1538,17 @@ class Connection(object):
                     error_domain=VIR_FROM_NODEDEV)
 
     def listDevices(self, cap, flags):
-        return self.pci_info.get_all_devices()
+        if cap == 'pci':
+            return self.pci_info.get_all_devices()
+        if cap == 'mdev':
+            return self.mdev_info.get_all_devices()
+        if cap == 'mdev_types':
+            # TODO(gibi): We should return something like
+            # https://libvirt.org/drvnodedev.html#MDEVCap but I tried and it
+            # did not work for me.
+            return None
+        else:
+            raise ValueError('Capability "%s" is not supported' % cap)
 
     def baselineCPU(self, cpu, flag):
         """Add new libvirt API."""