Add compute restart capability for libvirt func tests

The existing generic restart_compute_service() call in the nova test base class is not appropriate for the libvirt functional test that needs to reconfigure the libvirt connection as it is not aware of the libvirt specific mocking needed when a compute service is started. So this patch adds a specific restart_compute_service() call to nova.tests.functional.libvirt.base.ServersTestBase. This will be used by a later patch testing [pci]device_spec reconfiguration scenarios. This change showed that some of the existing libvirt functional test used the incomplete restart_compute_service from the base class. Others used local mocking to inject new pci config to the restart. I moved all these to the new function and removed the local mocking. Change-Id: Ic717dc42ac6b6cace59d344acaf12f9d1ee35564
2022-07-20 12:03:45 +02:00 · 2022-07-20 12:03:45 +02:00 · 57c253a609
parent ebae3c2081
commit 57c253a609
7 changed files with 127 additions and 54 deletions
--- a/nova/tests/functional/libvirt/base.py
+++ b/nova/tests/functional/libvirt/base.py
@ -114,7 +114,7 @@ class ServersTestBase(integrated_helpers._IntegratedTestBase):
    def start_compute(
        self, hostname='compute1', host_info=None, pci_info=None,
        mdev_info=None, vdpa_info=None, libvirt_version=None,
-        qemu_version=None,
+        qemu_version=None, cell_name=None, connection=None
    ):
        """Start a compute service.

@ -124,16 +124,35 @@ class ServersTestBase(integrated_helpers._IntegratedTestBase):
        :param host_info: A fakelibvirt.HostInfo object for the host. Defaults
            to a HostInfo with 2 NUMA nodes, 2 cores per node, 2 threads per
            core, and 16GB of RAM.
+        :param connection: A fake libvirt connection. You should not provide it
+            directly. However it is used by restart_compute_service to
+            implement restart without loosing the hypervisor state.
        :returns: The hostname of the created service, which can be used to
            lookup the created service and UUID of the assocaited resource
            provider.
        """
+        if connection and (
+            host_info or
+            pci_info or
+            mdev_info or
+            vdpa_info or
+            libvirt_version or
+            qemu_version
+        ):
+            raise ValueError(
+                "Either an existing connection instance can be provided or a "
+                "list of parameters for a new connection"
+            )

        def _start_compute(hostname, host_info):
-            fake_connection = self._get_connection(
-                host_info, pci_info, mdev_info, vdpa_info, libvirt_version,
-                qemu_version, hostname,
-            )
+            if connection:
+                fake_connection = connection
+            else:
+                fake_connection = self._get_connection(
+                    host_info, pci_info, mdev_info, vdpa_info, libvirt_version,
+                    qemu_version, hostname,
+                )
+
            # If the compute is configured with PCI devices then we need to
            # make sure that the stubs around sysfs has the MAC address
            # information for the PCI PF devices
@ -144,7 +163,8 @@ class ServersTestBase(integrated_helpers._IntegratedTestBase):
            # actually start the service.
            orig_con = self.mock_conn.return_value
            self.mock_conn.return_value = fake_connection
-            compute = self.start_service('compute', host=hostname)
+            compute = self.start_service(
+                'compute', host=hostname, cell_name=cell_name)
            # Once that's done, we need to tweak the compute "service" to
            # make sure it returns unique objects.
            compute.driver._host.get_connection = lambda: fake_connection
@ -165,6 +185,74 @@ class ServersTestBase(integrated_helpers._IntegratedTestBase):

        return hostname

+    def restart_compute_service(
+        self,
+        hostname,
+        host_info=None,
+        pci_info=None,
+        mdev_info=None,
+        vdpa_info=None,
+        libvirt_version=None,
+        qemu_version=None,
+        keep_hypervisor_state=True,
+    ):
+        """Stops the service and starts a new one to have realistic restart
+
+        :param hostname: the hostname of the nova-compute service to be
+            restarted
+        :param keep_hypervisor_state: If True then we reuse the fake connection
+            from the existing driver. If False a new connection will be created
+            based on the other parameters provided
+        """
+        # We are intentionally not calling super() here. Nova's base test class
+        # defines starting and restarting compute service with a very
+        # different signatures and also those calls are cannot be made aware of
+        # the intricacies of the libvirt fixture. So we simply hide that
+        # implementation.
+
+        if keep_hypervisor_state and (
+            host_info or
+            pci_info or
+            mdev_info or
+            vdpa_info or
+            libvirt_version or
+            qemu_version
+        ):
+            raise ValueError(
+                "Either keep_hypervisor_state=True or a list of libvirt "
+                "parameters can be provided but not both"
+            )
+
+        compute = self.computes.pop(hostname)
+        self.compute_rp_uuids.pop(hostname)
+
+        # NOTE(gibi): The service interface cannot be used to simulate a real
+        # service restart as the manager object will not be recreated after a
+        # service.stop() and service.start() therefore the manager state will
+        # survive. For example the resource tracker will not be recreated after
+        # a stop start. The service.kill() call cannot help as it deletes
+        # the service from the DB which is unrealistic and causes that some
+        # operation that refers to the killed host (e.g. evacuate) fails.
+        # So this helper method will stop the original service and then starts
+        # a brand new compute service for the same host and node. This way
+        # a new ComputeManager instance will be created and initialized during
+        # the service startup.
+        compute.stop()
+
+        # this service was running previously, so we have to make sure that
+        # we restart it in the same cell
+        cell_name = self.host_mappings[compute.host].cell_mapping.name
+
+        old_connection = compute.manager.driver._get_connection()
+
+        self.start_compute(
+            hostname, host_info, pci_info, mdev_info, vdpa_info,
+            libvirt_version, qemu_version, cell_name,
+            old_connection if keep_hypervisor_state else None
+        )
+
+        return self.computes[hostname]
+

 class LibvirtMigrationMixin(object):
    """A simple mixin to facilliate successful libvirt live migrations
--- a/nova/tests/functional/libvirt/test_device_bus_migration.py
+++ b/nova/tests/functional/libvirt/test_device_bus_migration.py
@ -51,7 +51,7 @@ class LibvirtDeviceBusMigration(base.ServersTestBase):

    def _assert_stashed_image_properties_persist(self, server, properties):
        # Assert the stashed properties persist across a host reboot
-        self.restart_compute_service(self.compute)
+        self.restart_compute_service(self.compute_hostname)
        self._assert_stashed_image_properties(server['id'], properties)

        # Assert the stashed properties persist across a guest reboot
@ -173,7 +173,7 @@ class LibvirtDeviceBusMigration(base.ServersTestBase):
        self.flags(pointer_model='ps2mouse')
        # Restart compute to pick up ps2 setting, which means the guest will
        # not get a prescribed pointer device
-        self.restart_compute_service(self.compute)
+        self.restart_compute_service(self.compute_hostname)

        # Create a server with default image properties
        default_image_properties1 = {
@ -187,7 +187,7 @@ class LibvirtDeviceBusMigration(base.ServersTestBase):
        # Assert the defaults persist across a host flag change
        self.flags(pointer_model='usbtablet')
        # Restart compute to pick up usb setting
-        self.restart_compute_service(self.compute)
+        self.restart_compute_service(self.compute_hostname)
        self._assert_stashed_image_properties(
            server1['id'], default_image_properties1)

@ -216,7 +216,7 @@ class LibvirtDeviceBusMigration(base.ServersTestBase):
        # https://bugs.launchpad.net/nova/+bug/1866106
        self.flags(pointer_model=None)
        # Restart compute to pick up None setting
-        self.restart_compute_service(self.compute)
+        self.restart_compute_service(self.compute_hostname)
        self._assert_stashed_image_properties(
            server1['id'], default_image_properties1)
        self._assert_stashed_image_properties(
--- a/nova/tests/functional/libvirt/test_numa_live_migration.py
+++ b/nova/tests/functional/libvirt/test_numa_live_migration.py
@ -206,10 +206,8 @@ class NUMALiveMigrationPositiveTests(NUMALiveMigrationPositiveBase):
        # Increase cpu_dedicated_set to 0-3, expecting the live migrated server
        # to end up on 2,3.
        self.flags(cpu_dedicated_set='0-3', group='compute')
-        self.computes['host_a'] = self.restart_compute_service(
-            self.computes['host_a'])
-        self.computes['host_b'] = self.restart_compute_service(
-            self.computes['host_b'])
+        self.restart_compute_service('host_a')
+        self.restart_compute_service('host_b')

        # Live migrate, RPC-pinning the destination host if asked
        if pin_dest:
@ -333,10 +331,8 @@ class NUMALiveMigrationRollbackTests(NUMALiveMigrationPositiveBase):
        # Increase cpu_dedicated_set to 0-3, expecting the live migrated server
        # to end up on 2,3.
        self.flags(cpu_dedicated_set='0-3', group='compute')
-        self.computes['host_a'] = self.restart_compute_service(
-            self.computes['host_a'])
-        self.computes['host_b'] = self.restart_compute_service(
-            self.computes['host_b'])
+        self.restart_compute_service('host_a')
+        self.restart_compute_service('host_b')

        # Live migrate, RPC-pinning the destination host if asked. This is a
        # rollback test, so server_a is expected to remain on host_a.
--- a/nova/tests/functional/libvirt/test_numa_servers.py
+++ b/nova/tests/functional/libvirt/test_numa_servers.py
@ -1187,10 +1187,8 @@ class ReshapeForPCPUsTest(NUMAServersTestBase):
        self.flags(cpu_dedicated_set='0-7', group='compute')
        self.flags(vcpu_pin_set=None)

-        computes = {}
-        for host, compute in self.computes.items():
-            computes[host] = self.restart_compute_service(compute)
-        self.computes = computes
+        for host in list(self.computes.keys()):
+            self.restart_compute_service(host)

        # verify that the inventory, usages and allocation are correct after
        # the reshape
--- a/nova/tests/functional/libvirt/test_pci_sriov_servers.py
+++ b/nova/tests/functional/libvirt/test_pci_sriov_servers.py
@ -908,11 +908,8 @@ class SRIOVServersTest(_PCIServersWithMigrationTestBase):
        # Disable SRIOV capabilties in PF and delete the VFs
        self._disable_sriov_in_pf(pci_info_no_sriov)

-        fake_connection = self._get_connection(pci_info=pci_info_no_sriov,
-                                               hostname='test_compute0')
-        self.mock_conn.return_value = fake_connection
-
-        self.compute = self.start_service('compute', host='test_compute0')
+        self.start_compute('test_compute0', pci_info=pci_info_no_sriov)
+        self.compute = self.computes['test_compute0']

        ctxt = context.get_admin_context()
        pci_devices = objects.PciDeviceList.get_by_compute_node(
@ -924,13 +921,9 @@ class SRIOVServersTest(_PCIServersWithMigrationTestBase):
        self.assertEqual(1, len(pci_devices))
        self.assertEqual('type-PCI', pci_devices[0].dev_type)

-        # Update connection with original pci info with sriov PFs
-        fake_connection = self._get_connection(pci_info=pci_info,
-                                               hostname='test_compute0')
-        self.mock_conn.return_value = fake_connection
-
-        # Restart the compute service
-        self.restart_compute_service(self.compute)
+        # Restart the compute service with sriov PFs
+        self.restart_compute_service(
+            self.compute.host, pci_info=pci_info, keep_hypervisor_state=False)

        # Verify if PCI devices are of type type-PF or type-VF
        pci_devices = objects.PciDeviceList.get_by_compute_node(
@ -1015,10 +1008,9 @@ class SRIOVAttachDetachTest(_PCIServersTestBase):
        host_info = fakelibvirt.HostInfo(cpu_nodes=2, cpu_sockets=1,
                                         cpu_cores=2, cpu_threads=2)
        pci_info = fakelibvirt.HostPCIDevicesInfo(num_pfs=1, num_vfs=1)
-        fake_connection = self._get_connection(host_info, pci_info)
-        self.mock_conn.return_value = fake_connection
-
-        self.compute = self.start_service('compute', host='test_compute0')
+        self.start_compute(
+            'test_compute0', host_info=host_info, pci_info=pci_info)
+        self.compute = self.computes['test_compute0']

        # Create server with a port
        server = self._create_server(networks=[{'port': first_port_id}])
--- a/nova/tests/functional/libvirt/test_reshape.py
+++ b/nova/tests/functional/libvirt/test_reshape.py
@ -72,11 +72,11 @@ class VGPUReshapeTests(base.ServersTestBase):
        # ignore the content of the above HostMdevDeviceInfo
        self.flags(enabled_mdev_types='', group='devices')

-        hostname = self.start_compute(
+        self.hostname = self.start_compute(
            hostname='compute1',
            mdev_info=fakelibvirt.HostMdevDevicesInfo(devices=mdevs),
        )
-        self.compute = self.computes[hostname]
+        self.compute = self.computes[self.hostname]

        # create the VGPU resource in placement manually
        compute_rp_uuid = self.placement.get(
@ -158,7 +158,7 @@ class VGPUReshapeTests(base.ServersTestBase):
                allocations[compute_rp_uuid]['resources'])

        # restart compute which will trigger a reshape
-        self.compute = self.restart_compute_service(self.compute)
+        self.compute = self.restart_compute_service(self.hostname)

        # verify that the inventory, usages and allocation are correct after
        # the reshape
--- a/nova/tests/functional/libvirt/test_vgpu.py
+++ b/nova/tests/functional/libvirt/test_vgpu.py
@ -113,8 +113,8 @@ class VGPUTestBase(base.ServersTestBase):
                                                   parent=libvirt_parent)})
        return uuid

-    def start_compute(self, hostname):
-        hostname = super().start_compute(
+    def start_compute_with_vgpu(self, hostname):
+        hostname = self.start_compute(
            pci_info=fakelibvirt.HostPCIDevicesInfo(
                num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
            ),
@ -197,7 +197,7 @@ class VGPUTests(VGPUTestBase):
            enabled_mdev_types=fakelibvirt.NVIDIA_11_VGPU_TYPE,
            group='devices')

-        self.compute1 = self.start_compute('host1')
+        self.compute1 = self.start_compute_with_vgpu('host1')

    def assert_vgpu_usage_for_compute(self, compute, expected):
        self.assert_mdev_usage(compute, expected_amount=expected)
@ -211,7 +211,7 @@ class VGPUTests(VGPUTestBase):

    def test_resize_servers_with_vgpu(self):
        # Add another compute for the sake of resizing
-        self.compute2 = self.start_compute('host2')
+        self.compute2 = self.start_compute_with_vgpu('host2')
        server = self._create_server(
            image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
            flavor_id=self.flavor, host=self.compute1.host,
@ -337,7 +337,7 @@ class VGPUMultipleTypesTests(VGPUTestBase):
        # Prepare traits for later on
        self._create_trait('CUSTOM_NVIDIA_11')
        self._create_trait('CUSTOM_NVIDIA_12')
-        self.compute1 = self.start_compute('host1')
+        self.compute1 = self.start_compute_with_vgpu('host1')

    def test_create_servers_with_vgpu(self):
        self._create_server(
@ -369,13 +369,12 @@ class VGPUMultipleTypesTests(VGPUTestBase):

    def test_create_servers_with_specific_type(self):
        # Regenerate the PCI addresses so both pGPUs now support nvidia-12
-        connection = self.computes[
-            self.compute1.host].driver._host.get_connection()
-        connection.pci_info = fakelibvirt.HostPCIDevicesInfo(
+        pci_info = fakelibvirt.HostPCIDevicesInfo(
            num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
            multiple_gpu_types=True)
        # Make a restart to update the Resource Providers
-        self.compute1 = self.restart_compute_service(self.compute1)
+        self.compute1 = self.restart_compute_service(
+            self.compute1.host, pci_info=pci_info, keep_hypervisor_state=False)
        pgpu1_rp_uuid = self._get_provider_uuid_by_name(
            self.compute1.host + '_' + fakelibvirt.MDEVCAP_DEV1_PCI_ADDR)
        pgpu2_rp_uuid = self._get_provider_uuid_by_name(
@ -451,7 +450,7 @@ class DifferentMdevClassesTests(VGPUTestBase):
                   group='mdev_nvidia-12')
        self.flags(mdev_class='CUSTOM_NOTVGPU', group='mdev_mlx5_core')

-        self.compute1 = self.start_compute('host1')
+        self.compute1 = self.start_compute_with_vgpu('host1')
        # Regenerate the PCI addresses so they can support both mlx5 and
        # nvidia-12 types
        connection = self.computes[
@ -460,7 +459,7 @@ class DifferentMdevClassesTests(VGPUTestBase):
            num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
            generic_types=True)
        # Make a restart to update the Resource Providers
-        self.compute1 = self.restart_compute_service(self.compute1)
+        self.compute1 = self.restart_compute_service('host1')

    def test_create_servers_with_different_mdev_classes(self):
        physdev1_rp_uuid = self._get_provider_uuid_by_name(
@ -498,7 +497,7 @@ class DifferentMdevClassesTests(VGPUTestBase):

    def test_resize_servers_with_mlx5(self):
        # Add another compute for the sake of resizing
-        self.compute2 = self.start_compute('host2')
+        self.compute2 = self.start_compute_with_vgpu('host2')
        # Regenerate the PCI addresses so they can support both mlx5 and
        # nvidia-12 types
        connection = self.computes[
@ -507,7 +506,7 @@ class DifferentMdevClassesTests(VGPUTestBase):
            num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2,
            generic_types=True)
        # Make a restart to update the Resource Providers
-        self.compute2 = self.restart_compute_service(self.compute2)
+        self.compute2 = self.restart_compute_service('host2')

        # Use the new flavor for booting
        server = self._create_server(