Start transition to declarative CPU pinning configs

Add two new jobs, one that runs CPU pinning tests with [compute]cpu_dedicated_set, the other with [DEFAULT]vcpu_pin_set. Start by converting the reboot live migration test to use the new way of doing CPU pinning. Because not all tests are converted at once (and the unconverted ones are still changing the host CPU pinning configs on the fly), the new jobs can only run the converted tests. In the future, once all tests have been converted in subsequent patches, we will merge the default job and the -cpupinning jobs, leaving only the legacy job to run the CPU pinning tests that need [DEFAULT]vcpu_pin_set. In this patch, because we no longer always need the target_host parameter to the live_migrate() helper, we can change its signature to make target_host optional. Change-Id: If0ef21f885eef880d55aed7ebed99828fe978d38
2021-08-20 10:14:33 -04:00 · 2021-08-20 10:14:33 -04:00 · cca33388c2
commit cca33388c2
parent 7431d4e962
7 changed files with 122 additions and 156 deletions
--- a/.zuul.yaml
+++ b/.zuul.yaml
@ -29,7 +29,8 @@
          - compute

 - job:
-    name: whitebox-devstack-multinode
+    name: whitebox-devstack-multinode-base
+    abstract: true
    parent: tempest-multinode-full-py3
    nodeset: nested-virt-multinode
    description: |
@ -79,12 +80,72 @@
      tempest:
        num_hugepages: 512

+- job:
+    name: whitebox-devstack-multinode
+    parent: whitebox-devstack-multinode-base
+    vars:
+      # NOTE(artom) We can't have this on the parent job, otherwise the two
+      # -cpupinnig jobs will inherit it as well.
+      tempest_exclude_regex: test_live_migrate_and_reboot
+
+- job:
+    name: whitebox-devstack-multinode-cpupinning
+    parent: whitebox-devstack-multinode-base
+    description: |
+      Runs the CPU pinning tests on single-NUMA, non-SMT, nested virt VMs. Uses
+      [compute]cpu_dedicated_set to configure host CPUs for pinning.
+    vars:
+      tempest_test_regex: 'test_live_migrate_and_reboot'
+      devstack_local_conf:
+        post-config:
+          $NOVA_CONF:
+            compute:
+              cpu_dedicated_set: '0-3'
+              cpu_shared_set: '4,5'
+    group-vars:
+      subnode:
+        devstack_local_conf:
+          post-config:
+            $NOVA_CONF:
+              compute:
+                cpu_dedicated_set: '4-7'
+                cpu_shared_set: '2,3'
+
+- job:
+    name: whitebox-devstack-multinode-cpupinninglegacy
+    parent: whitebox-devstack-multinode-base
+    description: |
+      Runs the CPU pinning tests on single-NUMA, non-SMT, nested virt VMs. Uses
+      [DEFAULT]vcpu_pin_set to configure host CPUs for pinning.
+    vars:
+      tempest_test_regex: 'test_live_migrate_and_reboot'
+      devstack_local_conf:
+        post-config:
+          $NOVA_CONF:
+            DEFAULT:
+              vcpu_pin_set: '0-3'
+            compute:
+              cpu_shared_set: '4,5'
+    group-vars:
+      subnode:
+        devstack_local_conf:
+          post-config:
+            $NOVA_CONF:
+              DEFAULT:
+                vcpu_pin_set: '4-7'
+              compute:
+                cpu_shared_set: '2,3'
+
 - project:
    templates:
      - openstack-python3-xena-jobs
    check:
      jobs:
        - whitebox-devstack-multinode
+        - whitebox-devstack-multinode-cpupinning
+        - whitebox-devstack-multinode-cpupinninglegacy
    gate:
      jobs:
        - whitebox-devstack-multinode
+        - whitebox-devstack-multinode-cpupinning
+        - whitebox-devstack-multinode-cpupinninglegacy
--- a/whitebox_tempest_plugin/api/compute/base.py
+++ b/whitebox_tempest_plugin/api/compute/base.py
@ -126,21 +126,22 @@ class BaseWhiteboxComputeTest(base.BaseV2ComputeAdminTest):
        xml = virshxml.dumpxml(server_instance_name)
        return ET.fromstring(xml)

-    def live_migrate(self, server_id, target_host, state):
-        self.admin_servers_client.live_migrate_server(
-            server_id, host=target_host, block_migration='auto')
+    def live_migrate(self, server_id, state, target_host=None):
+        orig_host = self.get_host_for_server(server_id)
+        self.admin_servers_client.live_migrate_server(server_id,
+                                                      block_migration='auto',
+                                                      host=target_host)
        waiters.wait_for_server_status(self.servers_client, server_id, state)
-        migration_list = (self.admin_migration_client.list_migrations()
-                          ['migrations'])
-
-        msg = ("Live Migration failed. Migrations list for Instance "
-               "%s: [" % server_id)
-        for live_migration in migration_list:
-            if (live_migration['instance_uuid'] == server_id):
-                msg += "\n%s" % live_migration
-        msg += "]"
-        self.assertEqual(target_host, self.get_host_for_server(server_id),
-                         msg)
+        if target_host:
+            self.assertEqual(
+                target_host, self.get_host_for_server(server_id),
+                'Live migration failed, instance %s is not '
+                'on target host %s' % (server_id, target_host))
+        else:
+            self.assertNotEqual(
+                orig_host, self.get_host_for_server(server_id),
+                'Live migration failed, '
+                'instance %s has not changed hosts' % server_id)

    # TODO(lyarwood): Refactor all of this into a common module between
    # tempest.api.{compute,volume} and tempest.scenario.manager where this
--- a/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py
+++ b/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py
@ -925,7 +925,7 @@ class NUMALiveMigrationTest(NUMALiveMigrationBase):
                ('DEFAULT', 'vcpu_pin_set',
                 hardware.format_cpu_spec(topo_a[0] + topo_a[1]))
            ):
-                self.live_migrate(server_b['id'], host_a, 'ACTIVE')
+                self.live_migrate(server_b['id'], 'ACTIVE', target_host=host_a)

                # They should have disjoint (non-null) CPU pins in their XML
                pin_a = self.get_pinning_as_set(server_a['id'])
@ -998,7 +998,7 @@ class NUMALiveMigrationTest(NUMALiveMigrationBase):

            # Live migrate server_b
            compute_a = self.get_host_other_than(server_b['id'])
-            self.live_migrate(server_b['id'], compute_a, 'ACTIVE')
+            self.live_migrate(server_b['id'], 'ACTIVE', target_host=compute_a)

            # They should have identical (non-null) emulator pins and disjoint
            # (non-null) CPU pins
@ -1114,7 +1114,7 @@ class NUMALiveMigrationTest(NUMALiveMigrationBase):

        # Live migrate server_b
        compute_a = self.get_host_other_than(server_b['id'])
-        self.live_migrate(server_b['id'], compute_a, 'ACTIVE')
+        self.live_migrate(server_b['id'], 'ACTIVE', target_host=compute_a)

        # Assert hugepage XML element is still present and correct size for
        # server_b after live migration
@ -1249,7 +1249,8 @@ class NUMACPUDedicatedLiveMigrationTest(NUMALiveMigrationBase):
            # Live migrate shared server A to the compute node with shared
            # server B. Both servers are using shared vCPU's so migration
            # should be successful
-            self.live_migrate(shared_server_a['id'], host2, 'ACTIVE')
+            self.live_migrate(shared_server_a['id'], 'ACTIVE',
+                              target_host=host2)

            # Validate shared server A now has a shared cpuset that is a equal
            # to it's new host's cpu_shared_set
@ -1265,7 +1266,8 @@ class NUMACPUDedicatedLiveMigrationTest(NUMALiveMigrationBase):
            # Live migrate dedicated server A to the same host holding
            # dedicated server B. End result should be all 4 servers are on
            # the same host.
-            self.live_migrate(dedicated_server_a['id'], host2, 'ACTIVE')
+            self.live_migrate(dedicated_server_a['id'], 'ACTIVE',
+                              target_host=host2)

            # Dedicated server A should have a CPU pin set that is a subset of
            # it's new host's cpu_dedicated_set and should not intersect with
--- a/whitebox_tempest_plugin/api/compute/test_file_backed_memory.py
+++ b/whitebox_tempest_plugin/api/compute/test_file_backed_memory.py
@ -67,8 +67,7 @@ class FileBackedMemory(base.BaseWhiteboxComputeTest):
        ):
            server = self.create_test_server()
            self._assert_shared_mode_and_file_type(server)
-            destination_host = self.get_host_other_than(server['id'])
-            self.live_migrate(server['id'], destination_host, 'ACTIVE')
+            self.live_migrate(server['id'], 'ACTIVE')
            self._assert_shared_mode_and_file_type(server)

    def test_live_migrate_non_file_backed_host_to_file_backed_host(self):
--- a/whitebox_tempest_plugin/api/compute/test_live_migration.py
+++ b/whitebox_tempest_plugin/api/compute/test_live_migration.py
@ -19,22 +19,21 @@ import testtools

 from tempest.common import utils
 from tempest import config
-from tempest.lib import decorators

 from whitebox_tempest_plugin.api.compute import base
 from whitebox_tempest_plugin.api.compute import numa_helper
-from whitebox_tempest_plugin import hardware
-from whitebox_tempest_plugin.services import clients
-from whitebox_tempest_plugin import utils as whitebox_utils
+

 CONF = config.CONF
 LOG = logging.getLogger(__name__)

+
 # NOTE(mdbooth): This test was originally based on
 #   tempest.api.compute.admin.test_live_migration


-class LiveMigrationBase(base.BaseWhiteboxComputeTest):
+class LiveMigrationBase(base.BaseWhiteboxComputeTest,
+                        numa_helper.NUMAHelperMixin):
    # First support for block_migration='auto': since Mitaka (OSP9)
    min_microversion = '2.25'

@ -50,15 +49,9 @@ class LiveMigrationBase(base.BaseWhiteboxComputeTest):
            raise cls.skipException(
                "Less than 2 compute nodes, skipping migration test.")

-
-class LiveMigrationTest(LiveMigrationBase):
-    # First support for block_migration='auto': since Mitaka (OSP9)
-    min_microversion = '2.25'
-
    @testtools.skipUnless(CONF.compute_feature_enabled.
                          volume_backed_live_migration,
                          'Volume-backed live migration not available')
-    @decorators.idempotent_id('41e92884-ed04-42da-89fc-ef8922646542')
    @utils.services('volume')
    def test_volume_backed_live_migration(self):
        # Live migrate an instance to another host
@ -73,126 +66,37 @@ class LiveMigrationTest(LiveMigrationBase):
        # The initial value of disk cache depends on config and the storage in
        # use. We can't guess it, so fetch it before we start.
        cache_type = root_disk_cache()
-
-        source_host = self.get_host_for_server(server_id)
-        destination_host = self.get_host_other_than(server_id)
-        LOG.info("Live migrate from source %s to destination %s",
-                 source_host, destination_host)
-        self.live_migrate(server_id, destination_host, 'ACTIVE')
+        self.live_migrate(server_id, 'ACTIVE')

        # Assert cache-mode has not changed during live migration
        self.assertEqual(cache_type, root_disk_cache())

-
-class LiveMigrationAndReboot(LiveMigrationBase, numa_helper.NUMAHelperMixin):
-
-    dedicated_cpu_policy = {'hw:cpu_policy': 'dedicated'}
-
-    @classmethod
-    def skip_checks(cls):
-        super(LiveMigrationAndReboot, cls).skip_checks()
-        if getattr(CONF.whitebox_hardware, 'cpu_topology', None) is None:
-            msg = "cpu_topology in whitebox-hardware is not present"
-            raise cls.skipException(msg)
-
-    def _migrate_and_reboot_instance(self, section, cpu_set_parameter):
-        flavor_vcpu_size = 2
-        cpu_list = hardware.get_all_cpus()
-        if len(cpu_list) < 4:
-            raise self.skipException('Requires 4 or more pCPUs to execute '
-                                     'the test')
-
-        host1, host2 = self.list_compute_hosts()
-
-        # Create two different cpu dedicated ranges for each host in order
-        # to force different domain XML after instance migration
-        host1_dedicated_set = cpu_list[:2]
-        host2_dedicated_set = cpu_list[2:4]
-
-        dedicated_flavor = self.create_flavor(
-            vcpus=flavor_vcpu_size,
-            extra_specs=self.dedicated_cpu_policy
-        )
-
-        host1_sm = clients.NovaServiceManager(host1, 'nova-compute',
-                                              self.os_admin.services_client)
-        host2_sm = clients.NovaServiceManager(host2, 'nova-compute',
-                                              self.os_admin.services_client)
-
-        with whitebox_utils.multicontext(
-            host1_sm.config_options(
-                (section, cpu_set_parameter,
-                 hardware.format_cpu_spec(host1_dedicated_set))),
-            host2_sm.config_options(
-                (section, cpu_set_parameter,
-                 hardware.format_cpu_spec(host2_dedicated_set)))
-        ):
-            # Create a server with a dedicated cpu policy
-            server = self.create_test_server(
-                flavor=dedicated_flavor['id']
-            )
-
-            # Gather the pinned CPUs for the instance prior to migration
-            pinned_cpus_pre_migration = self.get_pinning_as_set(server['id'])
-
-            # Determine the destination migration host and migrate the server
-            # to that host
-            compute_dest = self.get_host_other_than(server['id'])
-            self.live_migrate(server['id'], compute_dest, 'ACTIVE')
-
-            # After successful migration determine the instances pinned CPUs
-            pinned_cpus_post_migration = self.get_pinning_as_set(server['id'])
-
-            # Confirm the pCPUs are no longer the same as they were when
-            # on the source compute host
-            self.assertTrue(
-                pinned_cpus_post_migration.isdisjoint(
-                    pinned_cpus_pre_migration),
-                "After migration the the server %s's current pinned CPU's "
-                "%s should no longer match the pinned CPU's it had pre "
-                " migration %s" % (server['id'], pinned_cpus_post_migration,
-                                   pinned_cpus_pre_migration)
-            )
-
-            # Soft reboot the server
-            # TODO(artom) If the soft reboot fails, the libvirt driver will do
-            # a hard reboot. This is only detectable through log parsing, so to
-            # be 100% sure we got the soft reboot we wanted, we should probably
-            # do that.
-            self.reboot_server(server['id'], type='SOFT')
-
-            # Gather the server's pinned CPUs after the soft reboot
-            pinned_cpus_post_reboot = self.get_pinning_as_set(server['id'])
-
-            # Validate the server's pinned CPUs remain the same after the
-            # reboot
-            self.assertTrue(
-                pinned_cpus_post_migration == pinned_cpus_post_reboot,
-                'After soft rebooting server %s its pinned CPUs should have '
-                'remained the same as %s, but are instead now %s' % (
-                    server['id'], pinned_cpus_post_migration,
-                    pinned_cpus_post_reboot)
-            )
-
-            self.delete_server(server['id'])
-
-
-class VCPUPinSetMigrateAndReboot(LiveMigrationAndReboot):
-
-    max_microversion = '2.79'
-    pin_set_mode = 'vcpu_pin_set'
-    pin_section = 'DEFAULT'
-
-    def test_vcpu_pin_migrate_and_reboot(self):
-        self._migrate_and_reboot_instance(self.pin_section, self.pin_set_mode)
-
-
-class CPUDedicatedMigrateAndReboot(LiveMigrationAndReboot):
-
-    min_microversion = '2.79'
-    max_microversion = 'latest'
-    pin_set_mode = 'cpu_dedicated_set'
-    pin_section = 'compute'
-
-    def test_cpu_dedicated_migrate_and_reboot(self):
-        self._migrate_and_reboot_instance(self.pin_section, self.pin_set_mode)
+    def test_live_migrate_and_reboot(self):
+        """Test for bug 1890501. Assumes that [compute]cpu_dedicated_set
+        (or [DEFAULT]vcpu_pinset in the legacy case) are
+        different on all compute hosts in the deployment.
+        """
+        flavor = self.create_flavor(
+            extra_specs={'hw:cpu_policy': 'dedicated'})
+        server = self.create_test_server(flavor=flavor['id'])
+        pinned_cpus_pre_migration = self.get_pinning_as_set(server['id'])
+        self.live_migrate(server['id'], 'ACTIVE')
+        pinned_cpus_post_migration = self.get_pinning_as_set(server['id'])
+        self.assertTrue(
+            pinned_cpus_post_migration.isdisjoint(pinned_cpus_pre_migration),
+            "After migration the the server %s's current pinned CPU's "
+            "%s should no longer match the pinned CPU's it had pre "
+            " migration %s" % (server['id'], pinned_cpus_post_migration,
+                               pinned_cpus_pre_migration))
+        # TODO(artom) If the soft reboot fails, the libvirt driver will do
+        # a hard reboot. This is only detectable through log parsing, so to
+        # be 100% sure we got the soft reboot we wanted, we should probably
+        # do that.
+        self.reboot_server(server['id'], type='SOFT')
+        pinned_cpus_post_reboot = self.get_pinning_as_set(server['id'])
+        self.assertTrue(
+            pinned_cpus_post_migration == pinned_cpus_post_reboot,
+            'After soft rebooting server %s its pinned CPUs should have '
+            'remained the same as %s, but are instead now %s' % (
+                server['id'], pinned_cpus_post_migration,
+                pinned_cpus_post_reboot))
--- a/whitebox_tempest_plugin/api/compute/test_sriov.py
+++ b/whitebox_tempest_plugin/api/compute/test_sriov.py
@ -487,7 +487,7 @@ class SRIOVMigration(SRIOVBase):
        )

        # Live migrate the server
-        self.live_migrate(server['id'], hostname2, 'ACTIVE')
+        self.live_migrate(server['id'], 'ACTIVE', target_host=hostname2)

        # Search the instace's XML for the SR-IOV network device element based
        # on the mac address and binding:vnic_type from port info
@ -514,7 +514,7 @@ class SRIOVMigration(SRIOVBase):
                         'is %s' % pci_allocated_count)

        # Migrate server back to the original host
-        self.live_migrate(server['id'], hostname1, 'ACTIVE')
+        self.live_migrate(server['id'], 'ACTIVE', target_host=hostname1)

        # Again find the instance's network device element based on the mac
        # address and binding:vnic_type from the port info provided by ports
--- a/whitebox_tempest_plugin/api/compute/test_validate_selinux_labels.py
+++ b/whitebox_tempest_plugin/api/compute/test_validate_selinux_labels.py
@ -61,6 +61,5 @@ class SelinuxLabelsTest(base.BaseWhiteboxComputeTest):
    def test_live_migrate_with_label_check(self):
        server = self.create_test_server()
        self._assert_svirt_labels(server)
-        destination_host = self.get_host_other_than(server['id'])
-        self.live_migrate(server['id'], destination_host, 'ACTIVE')
+        self.live_migrate(server['id'], 'ACTIVE')
        self._assert_svirt_labels(server)