From 55fbe424479d263b86ce9e0b2b73354bdf8b2f99 Mon Sep 17 00:00:00 2001 From: James Parker Date: Tue, 26 Oct 2021 16:59:51 -0400 Subject: [PATCH] Remove NSM from NUMALiveMigrationTest Remove NovaServiceManager from NUMALiveMigrationTest and instead use dedicated_cpus_per_numa and shared_cpus_per_numa from whitebox-hardware when applicable. Also consolidate test_cpu_pinning and test_emulator_threads into a single testcase. Change-Id: I9923dfdc1887a76a3205f481a6119cda57d31c63 --- .zuul.yaml | 4 +- .../api/compute/test_cpu_pinning.py | 264 +++++++----------- 2 files changed, 98 insertions(+), 170 deletions(-) diff --git a/.zuul.yaml b/.zuul.yaml index 518c15dc..e750f306 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -86,7 +86,7 @@ vars: # NOTE(artom) We can't have this on the parent job, otherwise the two # -cpupinnig jobs will inherit it as well. - tempest_exclude_regex: 'test_live_migrate_and_reboot|test_shared_pinned_and_unpinned_guest|^whitebox_tempest_plugin.api.compute.test_cpu_pinning.EmulatorThreadTest' + tempest_exclude_regex: 'test_live_migrate_and_reboot|test_shared_pinned_and_unpinned_guest|^whitebox_tempest_plugin.api.compute.test_cpu_pinning.EmulatorThreadTest|test_cpu_pinning_and_emulator_threads' - job: name: whitebox-devstack-multinode-cpupinning @@ -95,7 +95,7 @@ Runs the CPU pinning tests on single-NUMA, non-SMT, nested virt VMs. Uses [compute]cpu_dedicated_set to configure host CPUs for pinning. vars: - tempest_test_regex: 'test_live_migrate_and_reboot|test_shared_pinned_and_unpinned_guest|^whitebox_tempest_plugin.api.compute.test_cpu_pinning.EmulatorThreadTest' + tempest_test_regex: 'test_live_migrate_and_reboot|test_shared_pinned_and_unpinned_guest|^whitebox_tempest_plugin.api.compute.test_cpu_pinning.EmulatorThreadTest|test_cpu_pinning_and_emulator_threads' devstack_local_conf: post-config: $NOVA_CONF: diff --git a/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py b/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py index 9dec7326..b48af140 100644 --- a/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py +++ b/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py @@ -34,7 +34,6 @@ from tempest.common import utils from tempest.common import waiters from tempest import config from tempest.exceptions import BuildErrorException -from tempest.lib import decorators from whitebox_tempest_plugin.api.compute import base from whitebox_tempest_plugin.api.compute import numa_helper @@ -72,6 +71,8 @@ class BasePinningTest(base.BaseWhiteboxComputeTest, return cell_pins + # TODO(jparker): Need to clean up this method and similar helper methods. + # This should either end up in numa_helper or the base compute test class def get_server_emulator_threads(self, server_id): """Get the host CPU numbers to which the server's emulator threads are pinned. @@ -430,7 +431,6 @@ class EmulatorThreadTest(BasePinningTest, numa_helper.NUMAHelperMixin): self.get_server_emulator_threads(server['id']) # Confirm the emulator threads from the server is equal to the host's - # cpu_shared_set self.assertEqual( cpu_shared_set, emulator_threads, 'Emulator threads for server %s is not the same as CPU set ' @@ -643,184 +643,112 @@ class NUMALiveMigrationTest(NUMALiveMigrationBase): if not compute.is_scheduler_filter_enabled('DifferentHostFilter'): raise cls.skipException('DifferentHostFilter required.') - @decorators.skip_because(bug='2007395', bug_type='storyboard') - def test_cpu_pinning(self): - host1, host2 = self.list_compute_hosts() - ctlplane1, ctlplane2 = [whitebox_utils.get_ctlplane_address(host) for - host in [host1, host2]] + def test_cpu_pinning_and_emulator_threads(self): + dedicated_cpus_per_numa = \ + CONF.whitebox_hardware.dedicated_cpus_per_numa + if dedicated_cpus_per_numa < 2: + msg = ('Need at least 2 or more pCPUs per NUMA allocated to the ' + 'cpu_dedicated_set of the compute host') + raise self.skipException(msg) - numaclient_1 = clients.NUMAClient(ctlplane1) - numaclient_2 = clients.NUMAClient(ctlplane2) + shared_cpus_per_numa = \ + CONF.whitebox_hardware.dedicated_cpus_per_numa + if shared_cpus_per_numa == 0: + raise self.skipException( + 'Need at least 1 or more pCPUs per NUMA allocated to the ' + 'cpu_shared_set of the compute host') - # Get hosts's topology - topo_1 = numaclient_1.get_host_topology() - topo_2 = numaclient_2.get_host_topology() + # Boot 2 servers such that their vCPUs "fill" a NUMA node. + specs = {'hw:cpu_policy': 'dedicated', + 'hw:emulator_threads_policy': 'share'} + flavor = self.create_flavor(vcpus=(int(dedicated_cpus_per_numa / 2)), + extra_specs=specs) + server_a = self.create_test_server(flavor=flavor['id']) + # TODO(artom) As of 2.68 we can no longer force a live-migration, + # and having the different_host hint in the RequestSpec will + # prevent live migration. Start enabling/disabling + # DifferentHostFilter as needed? + server_b = self.create_test_server( + flavor=flavor['id'], + scheduler_hints={'different_host': server_a['id']}) - # Need at least 2 NUMA nodes per host - if len(topo_1) < 2 or len(topo_2) < 2: - raise self.skipException('At least 2 NUMA nodes per host required') + # Iterate over both guests and confirm their pinned vCPUs and emulator + # threads are correct + for server in [server_a, server_b]: + # Determine the compute host of the guest + host = self.get_host_for_server(server['id']) + host_sm = clients.NovaServiceManager(host, 'nova-compute', + self.os_admin.services_client) - # All NUMA nodes need to have same number of CPUs - cpus_per_node = self._get_cpus_per_node(topo_1.values(), - topo_2.values()) - if len(cpus_per_node) != 1: - raise self.skipException('NUMA nodes must have same number of ' - 'CPUs') + # Gather the cpu_dedicated_set and cpu_shared_set configured for + # the compute host + cpu_dedicated_set = host_sm.get_cpu_dedicated_set() + cpu_shared_set = host_sm.get_cpu_shared_set() - # Set both hosts's vcpu_pin_set to the CPUs in the first NUMA node to - # force instances to land there - host1_sm = clients.NovaServiceManager(host1, 'nova-compute', - self.os_admin.services_client) - host2_sm = clients.NovaServiceManager(host2, 'nova-compute', - self.os_admin.services_client) - with whitebox_utils.multicontext( - host1_sm.config_options(('DEFAULT', 'vcpu_pin_set', - hardware.format_cpu_spec(topo_1[0]))), - host2_sm.config_options(('DEFAULT', 'vcpu_pin_set', - hardware.format_cpu_spec(topo_2[0]))) - ): - # Boot 2 servers such that their vCPUs "fill" a NUMA node. - specs = {'hw:cpu_policy': 'dedicated'} - flavor = self.create_flavor(vcpus=cpus_per_node.pop(), - extra_specs=specs) - server_a = self.create_test_server(flavor=flavor['id']) - # TODO(artom) As of 2.68 we can no longer force a live-migration, - # and having the different_host hint in the RequestSpec will - # prevent live migration. Start enabling/disabling - # DifferentHostFilter as needed? - server_b = self.create_test_server( - flavor=flavor['id'], - scheduler_hints={'different_host': server_a['id']}) + # Check the nova cells DB and gather the pCPU mapping for the + # guest. Confirm the pCPUs allocated to the guest as documented in + # the DB are a subset of the cpu_dedicated_set configured on the + # host + db_topo = self._get_db_numa_topology(server['id']) + pcpus = self._get_pcpus_from_cpu_pins( + self._get_cpu_pins_from_db_topology(db_topo)) + self.assertTrue(pcpus.issubset(cpu_dedicated_set)) - # At this point we expect CPU pinning in the database to be - # identical for both servers - db_topo_a = self._get_db_numa_topology(server_a['id']) - db_pins_a = self._get_cpu_pins_from_db_topology(db_topo_a) - db_topo_b = self._get_db_numa_topology(server_b['id']) - db_pins_b = self._get_cpu_pins_from_db_topology(db_topo_b) - self.assertEqual(db_pins_a, db_pins_b, - 'Expected servers to have identical CPU pins, ' - 'instead have %s and %s' % (db_pins_a, - db_pins_b)) - - # They should have identical (non-null) CPU pins - pin_a = self.get_pinning_as_set(server_a['id']) - pin_b = self.get_pinning_as_set(server_b['id']) - self.assertTrue(pin_a and pin_b, - 'Pinned servers are actually unpinned: ' - '%s, %s' % (pin_a, pin_b)) + # Gather the emulator threads configured on the guest. Verify the + # emulator threads on the guest are a subset of the cpu_shared_set + # configured on the compute host. + emulator_threads = self.get_server_emulator_threads(server['id']) self.assertEqual( - pin_a, pin_b, - 'Pins should be identical: %s, %s' % (pin_a, pin_b)) + cpu_shared_set, emulator_threads, + 'Emulator threads for server %s is not the same as CPU set ' + '%s' % (emulator_threads, cpu_shared_set)) - # Live migrate server_b to server_a's compute, adding the second - # NUMA node's CPUs to vcpu_pin_set - host_a = self.get_host_other_than(server_b['id']) - host_a_addr = whitebox_utils.get_ctlplane_address(host_a) - host_a_sm = clients.NovaServiceManager( - host_a, 'nova-compute', self.os_admin.services_client) - numaclient_a = clients.NUMAClient(host_a_addr) - topo_a = numaclient_a.get_host_topology() - with host_a_sm.config_options( - ('DEFAULT', 'vcpu_pin_set', - hardware.format_cpu_spec(topo_a[0] + topo_a[1])) - ): - self.live_migrate(server_b['id'], 'ACTIVE', target_host=host_a) + # Gather the cpu pin set for the guest and confirm it is a subset + # of its respective compute host + guest_pin_set = self.get_pinning_as_set(server['id']) + self.assertTrue( + guest_pin_set.issubset(cpu_dedicated_set), + 'Server %s\'s cpu dedicated set is not a subset of the ' + 'compute host\'s cpu dedicated set %s'.format( + guest_pin_set, cpu_dedicated_set)) - # They should have disjoint (non-null) CPU pins in their XML - pin_a = self.get_pinning_as_set(server_a['id']) - pin_b = self.get_pinning_as_set(server_b['id']) - self.assertTrue(pin_a and pin_b, - 'Pinned servers are actually unpinned: ' - '%s, %s' % (pin_a, pin_b)) - self.assertTrue(pin_a.isdisjoint(pin_b), - 'Pins overlap: %s, %s' % (pin_a, pin_b)) + # Migrate server B to the same compute host as server A + host_a = self.get_host_for_server(server_a['id']) + self.live_migrate(server_b['id'], 'ACTIVE', target_host=host_a) - # Same for their topologies in the database - db_topo_a = self._get_db_numa_topology(server_a['id']) - pcpus_a = self._get_pcpus_from_cpu_pins( - self._get_cpu_pins_from_db_topology(db_topo_a)) - db_topo_b = self._get_db_numa_topology(server_b['id']) - pcpus_b = self._get_pcpus_from_cpu_pins( - self._get_cpu_pins_from_db_topology(db_topo_b)) - self.assertTrue(pcpus_a and pcpus_b) - self.assertTrue( - pcpus_a.isdisjoint(pcpus_b), - 'Expected servers to have disjoint CPU pins in the ' - 'database, instead have %s and %s' % (pcpus_a, pcpus_b)) + # After migration, guests should have disjoint (non-null) CPU pins in + # their XML + pin_a = self.get_pinning_as_set(server_a['id']) + pin_b = self.get_pinning_as_set(server_b['id']) + self.assertTrue(pin_a and pin_b, + 'Pinned servers are actually unpinned: ' + '%s, %s' % (pin_a, pin_b)) + self.assertTrue(pin_a.isdisjoint(pin_b), + 'Pins overlap: %s, %s' % (pin_a, pin_b)) - # NOTE(artom) At this point we have to manually delete both - # servers before the config_options() context manager reverts - # any config changes it made. This is Nova bug 1836945. - self.delete_server(server_a['id']) - self.delete_server(server_b['id']) + # Same for their topologies in the database + db_topo_a = self._get_db_numa_topology(server_a['id']) + pcpus_a = self._get_pcpus_from_cpu_pins( + self._get_cpu_pins_from_db_topology(db_topo_a)) + db_topo_b = self._get_db_numa_topology(server_b['id']) + pcpus_b = self._get_pcpus_from_cpu_pins( + self._get_cpu_pins_from_db_topology(db_topo_b)) + self.assertTrue(pcpus_a and pcpus_b) + self.assertTrue( + pcpus_a.isdisjoint(pcpus_b), + 'Expected servers to have disjoint CPU pins in the ' + 'database, instead have %s and %s' % (pcpus_a, pcpus_b)) - def test_emulator_threads(self): - # Need 4 CPUs on each host - host1, host2 = self.list_compute_hosts() - ctlplane1, ctlplane2 = [whitebox_utils.get_ctlplane_address(host) for - host in [host1, host2]] - - for host in [ctlplane1, ctlplane2]: - numaclient = clients.NUMAClient(host) - num_cpus = numaclient.get_num_cpus() - if num_cpus < 4: - raise self.skipException('%s has %d CPUs, need 4', - host, - num_cpus) - - host1_sm = clients.NovaServiceManager(host1, 'nova-compute', - self.os_admin.services_client) - host2_sm = clients.NovaServiceManager(host2, 'nova-compute', - self.os_admin.services_client) - with whitebox_utils.multicontext( - host1_sm.config_options(('DEFAULT', 'vcpu_pin_set', '0,1'), - ('compute', 'cpu_shared_set', '2')), - host2_sm.config_options(('DEFAULT', 'vcpu_pin_set', '0,1'), - ('compute', 'cpu_shared_set', '3')) - ): - # Boot two servers - specs = {'hw:cpu_policy': 'dedicated', - 'hw:emulator_threads_policy': 'share'} - flavor = self.create_flavor(vcpus=1, extra_specs=specs) - server_a = self.create_test_server(flavor=flavor['id']) - server_b = self.create_test_server( - flavor=flavor['id'], - scheduler_hints={'different_host': server_a['id']}) - - # They should have different (non-null) emulator pins - threads_a = self.get_server_emulator_threads(server_a['id']) - threads_b = self.get_server_emulator_threads(server_b['id']) - self.assertTrue(threads_a and threads_b, - 'Emulator threads should be pinned, are unpinned: ' - '%s, %s' % (threads_a, threads_b)) - self.assertTrue(threads_a.isdisjoint(threads_b)) - - # Live migrate server_b - compute_a = self.get_host_other_than(server_b['id']) - self.live_migrate(server_b['id'], 'ACTIVE', target_host=compute_a) - - # They should have identical (non-null) emulator pins and disjoint - # (non-null) CPU pins - threads_a = self.get_server_emulator_threads(server_a['id']) - threads_b = self.get_server_emulator_threads(server_b['id']) - self.assertTrue(threads_a and threads_b, - 'Emulator threads should be pinned, are unpinned: ' - '%s, %s' % (threads_a, threads_b)) - self.assertEqual(threads_a, threads_b) - pin_a = self.get_pinning_as_set(server_a['id']) - pin_b = self.get_pinning_as_set(server_b['id']) - self.assertTrue(pin_a and pin_b, - 'Pinned servers are actually unpinned: ' - '%s, %s' % (pin_a, pin_b)) - self.assertTrue(pin_a.isdisjoint(pin_b), - 'Pins overlap: %s, %s' % (pin_a, pin_b)) - - # NOTE(artom) At this point we have to manually delete both - # servers before the config_options() context manager reverts - # any config changes it made. This is Nova bug 1836945. - self.delete_server(server_a['id']) - self.delete_server(server_b['id']) + # Guests emulator threads should still be configured for both guests. + # Since they are on the same compute host the guest's emulator threads + # should be the same. + threads_a = self.get_server_emulator_threads(server_a['id']) + threads_b = self.get_server_emulator_threads(server_b['id']) + self.assertTrue(threads_a and threads_b, + 'Emulator threads should be pinned, are unpinned: ' + '%s, %s' % (threads_a, threads_b)) + self.assertEqual(threads_a, threads_b, 'After live migration emulator ' + 'threads for both servers should be the same') def test_hugepages(self): host_a, host_b = [whitebox_utils.get_ctlplane_address(host) for host in