Remove NSM from NUMALiveMigrationTest

Remove NovaServiceManager from NUMALiveMigrationTest and instead use
dedicated_cpus_per_numa and shared_cpus_per_numa from whitebox-hardware
when applicable. Also consolidate test_cpu_pinning and
test_emulator_threads into a single testcase.

Change-Id: I9923dfdc1887a76a3205f481a6119cda57d31c63
This commit is contained in:
James Parker 2021-10-26 16:59:51 -04:00
parent 7385e1bc55
commit 55fbe42447
2 changed files with 98 additions and 170 deletions

View File

@ -86,7 +86,7 @@
vars: vars:
# NOTE(artom) We can't have this on the parent job, otherwise the two # NOTE(artom) We can't have this on the parent job, otherwise the two
# -cpupinnig jobs will inherit it as well. # -cpupinnig jobs will inherit it as well.
tempest_exclude_regex: 'test_live_migrate_and_reboot|test_shared_pinned_and_unpinned_guest|^whitebox_tempest_plugin.api.compute.test_cpu_pinning.EmulatorThreadTest' tempest_exclude_regex: 'test_live_migrate_and_reboot|test_shared_pinned_and_unpinned_guest|^whitebox_tempest_plugin.api.compute.test_cpu_pinning.EmulatorThreadTest|test_cpu_pinning_and_emulator_threads'
- job: - job:
name: whitebox-devstack-multinode-cpupinning name: whitebox-devstack-multinode-cpupinning
@ -95,7 +95,7 @@
Runs the CPU pinning tests on single-NUMA, non-SMT, nested virt VMs. Uses Runs the CPU pinning tests on single-NUMA, non-SMT, nested virt VMs. Uses
[compute]cpu_dedicated_set to configure host CPUs for pinning. [compute]cpu_dedicated_set to configure host CPUs for pinning.
vars: vars:
tempest_test_regex: 'test_live_migrate_and_reboot|test_shared_pinned_and_unpinned_guest|^whitebox_tempest_plugin.api.compute.test_cpu_pinning.EmulatorThreadTest' tempest_test_regex: 'test_live_migrate_and_reboot|test_shared_pinned_and_unpinned_guest|^whitebox_tempest_plugin.api.compute.test_cpu_pinning.EmulatorThreadTest|test_cpu_pinning_and_emulator_threads'
devstack_local_conf: devstack_local_conf:
post-config: post-config:
$NOVA_CONF: $NOVA_CONF:

View File

@ -34,7 +34,6 @@ from tempest.common import utils
from tempest.common import waiters from tempest.common import waiters
from tempest import config from tempest import config
from tempest.exceptions import BuildErrorException from tempest.exceptions import BuildErrorException
from tempest.lib import decorators
from whitebox_tempest_plugin.api.compute import base from whitebox_tempest_plugin.api.compute import base
from whitebox_tempest_plugin.api.compute import numa_helper from whitebox_tempest_plugin.api.compute import numa_helper
@ -72,6 +71,8 @@ class BasePinningTest(base.BaseWhiteboxComputeTest,
return cell_pins return cell_pins
# TODO(jparker): Need to clean up this method and similar helper methods.
# This should either end up in numa_helper or the base compute test class
def get_server_emulator_threads(self, server_id): def get_server_emulator_threads(self, server_id):
"""Get the host CPU numbers to which the server's emulator threads are """Get the host CPU numbers to which the server's emulator threads are
pinned. pinned.
@ -430,7 +431,6 @@ class EmulatorThreadTest(BasePinningTest, numa_helper.NUMAHelperMixin):
self.get_server_emulator_threads(server['id']) self.get_server_emulator_threads(server['id'])
# Confirm the emulator threads from the server is equal to the host's # Confirm the emulator threads from the server is equal to the host's
# cpu_shared_set
self.assertEqual( self.assertEqual(
cpu_shared_set, emulator_threads, cpu_shared_set, emulator_threads,
'Emulator threads for server %s is not the same as CPU set ' 'Emulator threads for server %s is not the same as CPU set '
@ -643,184 +643,112 @@ class NUMALiveMigrationTest(NUMALiveMigrationBase):
if not compute.is_scheduler_filter_enabled('DifferentHostFilter'): if not compute.is_scheduler_filter_enabled('DifferentHostFilter'):
raise cls.skipException('DifferentHostFilter required.') raise cls.skipException('DifferentHostFilter required.')
@decorators.skip_because(bug='2007395', bug_type='storyboard') def test_cpu_pinning_and_emulator_threads(self):
def test_cpu_pinning(self): dedicated_cpus_per_numa = \
host1, host2 = self.list_compute_hosts() CONF.whitebox_hardware.dedicated_cpus_per_numa
ctlplane1, ctlplane2 = [whitebox_utils.get_ctlplane_address(host) for if dedicated_cpus_per_numa < 2:
host in [host1, host2]] msg = ('Need at least 2 or more pCPUs per NUMA allocated to the '
'cpu_dedicated_set of the compute host')
raise self.skipException(msg)
numaclient_1 = clients.NUMAClient(ctlplane1) shared_cpus_per_numa = \
numaclient_2 = clients.NUMAClient(ctlplane2) CONF.whitebox_hardware.dedicated_cpus_per_numa
if shared_cpus_per_numa == 0:
raise self.skipException(
'Need at least 1 or more pCPUs per NUMA allocated to the '
'cpu_shared_set of the compute host')
# Get hosts's topology # Boot 2 servers such that their vCPUs "fill" a NUMA node.
topo_1 = numaclient_1.get_host_topology() specs = {'hw:cpu_policy': 'dedicated',
topo_2 = numaclient_2.get_host_topology() 'hw:emulator_threads_policy': 'share'}
flavor = self.create_flavor(vcpus=(int(dedicated_cpus_per_numa / 2)),
extra_specs=specs)
server_a = self.create_test_server(flavor=flavor['id'])
# TODO(artom) As of 2.68 we can no longer force a live-migration,
# and having the different_host hint in the RequestSpec will
# prevent live migration. Start enabling/disabling
# DifferentHostFilter as needed?
server_b = self.create_test_server(
flavor=flavor['id'],
scheduler_hints={'different_host': server_a['id']})
# Need at least 2 NUMA nodes per host # Iterate over both guests and confirm their pinned vCPUs and emulator
if len(topo_1) < 2 or len(topo_2) < 2: # threads are correct
raise self.skipException('At least 2 NUMA nodes per host required') for server in [server_a, server_b]:
# Determine the compute host of the guest
host = self.get_host_for_server(server['id'])
host_sm = clients.NovaServiceManager(host, 'nova-compute',
self.os_admin.services_client)
# All NUMA nodes need to have same number of CPUs # Gather the cpu_dedicated_set and cpu_shared_set configured for
cpus_per_node = self._get_cpus_per_node(topo_1.values(), # the compute host
topo_2.values()) cpu_dedicated_set = host_sm.get_cpu_dedicated_set()
if len(cpus_per_node) != 1: cpu_shared_set = host_sm.get_cpu_shared_set()
raise self.skipException('NUMA nodes must have same number of '
'CPUs')
# Set both hosts's vcpu_pin_set to the CPUs in the first NUMA node to # Check the nova cells DB and gather the pCPU mapping for the
# force instances to land there # guest. Confirm the pCPUs allocated to the guest as documented in
host1_sm = clients.NovaServiceManager(host1, 'nova-compute', # the DB are a subset of the cpu_dedicated_set configured on the
self.os_admin.services_client) # host
host2_sm = clients.NovaServiceManager(host2, 'nova-compute', db_topo = self._get_db_numa_topology(server['id'])
self.os_admin.services_client) pcpus = self._get_pcpus_from_cpu_pins(
with whitebox_utils.multicontext( self._get_cpu_pins_from_db_topology(db_topo))
host1_sm.config_options(('DEFAULT', 'vcpu_pin_set', self.assertTrue(pcpus.issubset(cpu_dedicated_set))
hardware.format_cpu_spec(topo_1[0]))),
host2_sm.config_options(('DEFAULT', 'vcpu_pin_set',
hardware.format_cpu_spec(topo_2[0])))
):
# Boot 2 servers such that their vCPUs "fill" a NUMA node.
specs = {'hw:cpu_policy': 'dedicated'}
flavor = self.create_flavor(vcpus=cpus_per_node.pop(),
extra_specs=specs)
server_a = self.create_test_server(flavor=flavor['id'])
# TODO(artom) As of 2.68 we can no longer force a live-migration,
# and having the different_host hint in the RequestSpec will
# prevent live migration. Start enabling/disabling
# DifferentHostFilter as needed?
server_b = self.create_test_server(
flavor=flavor['id'],
scheduler_hints={'different_host': server_a['id']})
# At this point we expect CPU pinning in the database to be # Gather the emulator threads configured on the guest. Verify the
# identical for both servers # emulator threads on the guest are a subset of the cpu_shared_set
db_topo_a = self._get_db_numa_topology(server_a['id']) # configured on the compute host.
db_pins_a = self._get_cpu_pins_from_db_topology(db_topo_a) emulator_threads = self.get_server_emulator_threads(server['id'])
db_topo_b = self._get_db_numa_topology(server_b['id'])
db_pins_b = self._get_cpu_pins_from_db_topology(db_topo_b)
self.assertEqual(db_pins_a, db_pins_b,
'Expected servers to have identical CPU pins, '
'instead have %s and %s' % (db_pins_a,
db_pins_b))
# They should have identical (non-null) CPU pins
pin_a = self.get_pinning_as_set(server_a['id'])
pin_b = self.get_pinning_as_set(server_b['id'])
self.assertTrue(pin_a and pin_b,
'Pinned servers are actually unpinned: '
'%s, %s' % (pin_a, pin_b))
self.assertEqual( self.assertEqual(
pin_a, pin_b, cpu_shared_set, emulator_threads,
'Pins should be identical: %s, %s' % (pin_a, pin_b)) 'Emulator threads for server %s is not the same as CPU set '
'%s' % (emulator_threads, cpu_shared_set))
# Live migrate server_b to server_a's compute, adding the second # Gather the cpu pin set for the guest and confirm it is a subset
# NUMA node's CPUs to vcpu_pin_set # of its respective compute host
host_a = self.get_host_other_than(server_b['id']) guest_pin_set = self.get_pinning_as_set(server['id'])
host_a_addr = whitebox_utils.get_ctlplane_address(host_a) self.assertTrue(
host_a_sm = clients.NovaServiceManager( guest_pin_set.issubset(cpu_dedicated_set),
host_a, 'nova-compute', self.os_admin.services_client) 'Server %s\'s cpu dedicated set is not a subset of the '
numaclient_a = clients.NUMAClient(host_a_addr) 'compute host\'s cpu dedicated set %s'.format(
topo_a = numaclient_a.get_host_topology() guest_pin_set, cpu_dedicated_set))
with host_a_sm.config_options(
('DEFAULT', 'vcpu_pin_set',
hardware.format_cpu_spec(topo_a[0] + topo_a[1]))
):
self.live_migrate(server_b['id'], 'ACTIVE', target_host=host_a)
# They should have disjoint (non-null) CPU pins in their XML # Migrate server B to the same compute host as server A
pin_a = self.get_pinning_as_set(server_a['id']) host_a = self.get_host_for_server(server_a['id'])
pin_b = self.get_pinning_as_set(server_b['id']) self.live_migrate(server_b['id'], 'ACTIVE', target_host=host_a)
self.assertTrue(pin_a and pin_b,
'Pinned servers are actually unpinned: '
'%s, %s' % (pin_a, pin_b))
self.assertTrue(pin_a.isdisjoint(pin_b),
'Pins overlap: %s, %s' % (pin_a, pin_b))
# Same for their topologies in the database # After migration, guests should have disjoint (non-null) CPU pins in
db_topo_a = self._get_db_numa_topology(server_a['id']) # their XML
pcpus_a = self._get_pcpus_from_cpu_pins( pin_a = self.get_pinning_as_set(server_a['id'])
self._get_cpu_pins_from_db_topology(db_topo_a)) pin_b = self.get_pinning_as_set(server_b['id'])
db_topo_b = self._get_db_numa_topology(server_b['id']) self.assertTrue(pin_a and pin_b,
pcpus_b = self._get_pcpus_from_cpu_pins( 'Pinned servers are actually unpinned: '
self._get_cpu_pins_from_db_topology(db_topo_b)) '%s, %s' % (pin_a, pin_b))
self.assertTrue(pcpus_a and pcpus_b) self.assertTrue(pin_a.isdisjoint(pin_b),
self.assertTrue( 'Pins overlap: %s, %s' % (pin_a, pin_b))
pcpus_a.isdisjoint(pcpus_b),
'Expected servers to have disjoint CPU pins in the '
'database, instead have %s and %s' % (pcpus_a, pcpus_b))
# NOTE(artom) At this point we have to manually delete both # Same for their topologies in the database
# servers before the config_options() context manager reverts db_topo_a = self._get_db_numa_topology(server_a['id'])
# any config changes it made. This is Nova bug 1836945. pcpus_a = self._get_pcpus_from_cpu_pins(
self.delete_server(server_a['id']) self._get_cpu_pins_from_db_topology(db_topo_a))
self.delete_server(server_b['id']) db_topo_b = self._get_db_numa_topology(server_b['id'])
pcpus_b = self._get_pcpus_from_cpu_pins(
self._get_cpu_pins_from_db_topology(db_topo_b))
self.assertTrue(pcpus_a and pcpus_b)
self.assertTrue(
pcpus_a.isdisjoint(pcpus_b),
'Expected servers to have disjoint CPU pins in the '
'database, instead have %s and %s' % (pcpus_a, pcpus_b))
def test_emulator_threads(self): # Guests emulator threads should still be configured for both guests.
# Need 4 CPUs on each host # Since they are on the same compute host the guest's emulator threads
host1, host2 = self.list_compute_hosts() # should be the same.
ctlplane1, ctlplane2 = [whitebox_utils.get_ctlplane_address(host) for threads_a = self.get_server_emulator_threads(server_a['id'])
host in [host1, host2]] threads_b = self.get_server_emulator_threads(server_b['id'])
self.assertTrue(threads_a and threads_b,
for host in [ctlplane1, ctlplane2]: 'Emulator threads should be pinned, are unpinned: '
numaclient = clients.NUMAClient(host) '%s, %s' % (threads_a, threads_b))
num_cpus = numaclient.get_num_cpus() self.assertEqual(threads_a, threads_b, 'After live migration emulator '
if num_cpus < 4: 'threads for both servers should be the same')
raise self.skipException('%s has %d CPUs, need 4',
host,
num_cpus)
host1_sm = clients.NovaServiceManager(host1, 'nova-compute',
self.os_admin.services_client)
host2_sm = clients.NovaServiceManager(host2, 'nova-compute',
self.os_admin.services_client)
with whitebox_utils.multicontext(
host1_sm.config_options(('DEFAULT', 'vcpu_pin_set', '0,1'),
('compute', 'cpu_shared_set', '2')),
host2_sm.config_options(('DEFAULT', 'vcpu_pin_set', '0,1'),
('compute', 'cpu_shared_set', '3'))
):
# Boot two servers
specs = {'hw:cpu_policy': 'dedicated',
'hw:emulator_threads_policy': 'share'}
flavor = self.create_flavor(vcpus=1, extra_specs=specs)
server_a = self.create_test_server(flavor=flavor['id'])
server_b = self.create_test_server(
flavor=flavor['id'],
scheduler_hints={'different_host': server_a['id']})
# They should have different (non-null) emulator pins
threads_a = self.get_server_emulator_threads(server_a['id'])
threads_b = self.get_server_emulator_threads(server_b['id'])
self.assertTrue(threads_a and threads_b,
'Emulator threads should be pinned, are unpinned: '
'%s, %s' % (threads_a, threads_b))
self.assertTrue(threads_a.isdisjoint(threads_b))
# Live migrate server_b
compute_a = self.get_host_other_than(server_b['id'])
self.live_migrate(server_b['id'], 'ACTIVE', target_host=compute_a)
# They should have identical (non-null) emulator pins and disjoint
# (non-null) CPU pins
threads_a = self.get_server_emulator_threads(server_a['id'])
threads_b = self.get_server_emulator_threads(server_b['id'])
self.assertTrue(threads_a and threads_b,
'Emulator threads should be pinned, are unpinned: '
'%s, %s' % (threads_a, threads_b))
self.assertEqual(threads_a, threads_b)
pin_a = self.get_pinning_as_set(server_a['id'])
pin_b = self.get_pinning_as_set(server_b['id'])
self.assertTrue(pin_a and pin_b,
'Pinned servers are actually unpinned: '
'%s, %s' % (pin_a, pin_b))
self.assertTrue(pin_a.isdisjoint(pin_b),
'Pins overlap: %s, %s' % (pin_a, pin_b))
# NOTE(artom) At this point we have to manually delete both
# servers before the config_options() context manager reverts
# any config changes it made. This is Nova bug 1836945.
self.delete_server(server_a['id'])
self.delete_server(server_b['id'])
def test_hugepages(self): def test_hugepages(self):
host_a, host_b = [whitebox_utils.get_ctlplane_address(host) for host in host_a, host_b = [whitebox_utils.get_ctlplane_address(host) for host in