Start transition to declarative CPU pinning configs

Add two new jobs, one that runs CPU pinning tests with
[compute]cpu_dedicated_set, the other with [DEFAULT]vcpu_pin_set.
Start by converting the reboot live migration test to use the new way
of doing CPU pinning.

Because not all tests are converted at once (and the unconverted ones
are still changing the host CPU pinning configs on the fly), the new
jobs can only run the converted tests.

In the future, once all tests have been converted in subsequent
patches, we will merge the default job and the -cpupinning jobs,
leaving only the legacy job to run the CPU pinning tests that need
[DEFAULT]vcpu_pin_set.

In this patch, because we no longer always need the target_host
parameter to the live_migrate() helper, we can change its signature to
make target_host optional.

Change-Id: If0ef21f885eef880d55aed7ebed99828fe978d38
This commit is contained in:
Artom Lifshitz 2021-08-20 10:14:33 -04:00
parent 7431d4e962
commit cca33388c2
7 changed files with 122 additions and 156 deletions

View File

@ -29,7 +29,8 @@
- compute
- job:
name: whitebox-devstack-multinode
name: whitebox-devstack-multinode-base
abstract: true
parent: tempest-multinode-full-py3
nodeset: nested-virt-multinode
description: |
@ -79,12 +80,72 @@
tempest:
num_hugepages: 512
- job:
name: whitebox-devstack-multinode
parent: whitebox-devstack-multinode-base
vars:
# NOTE(artom) We can't have this on the parent job, otherwise the two
# -cpupinnig jobs will inherit it as well.
tempest_exclude_regex: test_live_migrate_and_reboot
- job:
name: whitebox-devstack-multinode-cpupinning
parent: whitebox-devstack-multinode-base
description: |
Runs the CPU pinning tests on single-NUMA, non-SMT, nested virt VMs. Uses
[compute]cpu_dedicated_set to configure host CPUs for pinning.
vars:
tempest_test_regex: 'test_live_migrate_and_reboot'
devstack_local_conf:
post-config:
$NOVA_CONF:
compute:
cpu_dedicated_set: '0-3'
cpu_shared_set: '4,5'
group-vars:
subnode:
devstack_local_conf:
post-config:
$NOVA_CONF:
compute:
cpu_dedicated_set: '4-7'
cpu_shared_set: '2,3'
- job:
name: whitebox-devstack-multinode-cpupinninglegacy
parent: whitebox-devstack-multinode-base
description: |
Runs the CPU pinning tests on single-NUMA, non-SMT, nested virt VMs. Uses
[DEFAULT]vcpu_pin_set to configure host CPUs for pinning.
vars:
tempest_test_regex: 'test_live_migrate_and_reboot'
devstack_local_conf:
post-config:
$NOVA_CONF:
DEFAULT:
vcpu_pin_set: '0-3'
compute:
cpu_shared_set: '4,5'
group-vars:
subnode:
devstack_local_conf:
post-config:
$NOVA_CONF:
DEFAULT:
vcpu_pin_set: '4-7'
compute:
cpu_shared_set: '2,3'
- project:
templates:
- openstack-python3-xena-jobs
check:
jobs:
- whitebox-devstack-multinode
- whitebox-devstack-multinode-cpupinning
- whitebox-devstack-multinode-cpupinninglegacy
gate:
jobs:
- whitebox-devstack-multinode
- whitebox-devstack-multinode-cpupinning
- whitebox-devstack-multinode-cpupinninglegacy

View File

@ -126,21 +126,22 @@ class BaseWhiteboxComputeTest(base.BaseV2ComputeAdminTest):
xml = virshxml.dumpxml(server_instance_name)
return ET.fromstring(xml)
def live_migrate(self, server_id, target_host, state):
self.admin_servers_client.live_migrate_server(
server_id, host=target_host, block_migration='auto')
def live_migrate(self, server_id, state, target_host=None):
orig_host = self.get_host_for_server(server_id)
self.admin_servers_client.live_migrate_server(server_id,
block_migration='auto',
host=target_host)
waiters.wait_for_server_status(self.servers_client, server_id, state)
migration_list = (self.admin_migration_client.list_migrations()
['migrations'])
msg = ("Live Migration failed. Migrations list for Instance "
"%s: [" % server_id)
for live_migration in migration_list:
if (live_migration['instance_uuid'] == server_id):
msg += "\n%s" % live_migration
msg += "]"
self.assertEqual(target_host, self.get_host_for_server(server_id),
msg)
if target_host:
self.assertEqual(
target_host, self.get_host_for_server(server_id),
'Live migration failed, instance %s is not '
'on target host %s' % (server_id, target_host))
else:
self.assertNotEqual(
orig_host, self.get_host_for_server(server_id),
'Live migration failed, '
'instance %s has not changed hosts' % server_id)
# TODO(lyarwood): Refactor all of this into a common module between
# tempest.api.{compute,volume} and tempest.scenario.manager where this

View File

@ -925,7 +925,7 @@ class NUMALiveMigrationTest(NUMALiveMigrationBase):
('DEFAULT', 'vcpu_pin_set',
hardware.format_cpu_spec(topo_a[0] + topo_a[1]))
):
self.live_migrate(server_b['id'], host_a, 'ACTIVE')
self.live_migrate(server_b['id'], 'ACTIVE', target_host=host_a)
# They should have disjoint (non-null) CPU pins in their XML
pin_a = self.get_pinning_as_set(server_a['id'])
@ -998,7 +998,7 @@ class NUMALiveMigrationTest(NUMALiveMigrationBase):
# Live migrate server_b
compute_a = self.get_host_other_than(server_b['id'])
self.live_migrate(server_b['id'], compute_a, 'ACTIVE')
self.live_migrate(server_b['id'], 'ACTIVE', target_host=compute_a)
# They should have identical (non-null) emulator pins and disjoint
# (non-null) CPU pins
@ -1114,7 +1114,7 @@ class NUMALiveMigrationTest(NUMALiveMigrationBase):
# Live migrate server_b
compute_a = self.get_host_other_than(server_b['id'])
self.live_migrate(server_b['id'], compute_a, 'ACTIVE')
self.live_migrate(server_b['id'], 'ACTIVE', target_host=compute_a)
# Assert hugepage XML element is still present and correct size for
# server_b after live migration
@ -1249,7 +1249,8 @@ class NUMACPUDedicatedLiveMigrationTest(NUMALiveMigrationBase):
# Live migrate shared server A to the compute node with shared
# server B. Both servers are using shared vCPU's so migration
# should be successful
self.live_migrate(shared_server_a['id'], host2, 'ACTIVE')
self.live_migrate(shared_server_a['id'], 'ACTIVE',
target_host=host2)
# Validate shared server A now has a shared cpuset that is a equal
# to it's new host's cpu_shared_set
@ -1265,7 +1266,8 @@ class NUMACPUDedicatedLiveMigrationTest(NUMALiveMigrationBase):
# Live migrate dedicated server A to the same host holding
# dedicated server B. End result should be all 4 servers are on
# the same host.
self.live_migrate(dedicated_server_a['id'], host2, 'ACTIVE')
self.live_migrate(dedicated_server_a['id'], 'ACTIVE',
target_host=host2)
# Dedicated server A should have a CPU pin set that is a subset of
# it's new host's cpu_dedicated_set and should not intersect with

View File

@ -67,8 +67,7 @@ class FileBackedMemory(base.BaseWhiteboxComputeTest):
):
server = self.create_test_server()
self._assert_shared_mode_and_file_type(server)
destination_host = self.get_host_other_than(server['id'])
self.live_migrate(server['id'], destination_host, 'ACTIVE')
self.live_migrate(server['id'], 'ACTIVE')
self._assert_shared_mode_and_file_type(server)
def test_live_migrate_non_file_backed_host_to_file_backed_host(self):

View File

@ -19,22 +19,21 @@ import testtools
from tempest.common import utils
from tempest import config
from tempest.lib import decorators
from whitebox_tempest_plugin.api.compute import base
from whitebox_tempest_plugin.api.compute import numa_helper
from whitebox_tempest_plugin import hardware
from whitebox_tempest_plugin.services import clients
from whitebox_tempest_plugin import utils as whitebox_utils
CONF = config.CONF
LOG = logging.getLogger(__name__)
# NOTE(mdbooth): This test was originally based on
# tempest.api.compute.admin.test_live_migration
class LiveMigrationBase(base.BaseWhiteboxComputeTest):
class LiveMigrationBase(base.BaseWhiteboxComputeTest,
numa_helper.NUMAHelperMixin):
# First support for block_migration='auto': since Mitaka (OSP9)
min_microversion = '2.25'
@ -50,15 +49,9 @@ class LiveMigrationBase(base.BaseWhiteboxComputeTest):
raise cls.skipException(
"Less than 2 compute nodes, skipping migration test.")
class LiveMigrationTest(LiveMigrationBase):
# First support for block_migration='auto': since Mitaka (OSP9)
min_microversion = '2.25'
@testtools.skipUnless(CONF.compute_feature_enabled.
volume_backed_live_migration,
'Volume-backed live migration not available')
@decorators.idempotent_id('41e92884-ed04-42da-89fc-ef8922646542')
@utils.services('volume')
def test_volume_backed_live_migration(self):
# Live migrate an instance to another host
@ -73,126 +66,37 @@ class LiveMigrationTest(LiveMigrationBase):
# The initial value of disk cache depends on config and the storage in
# use. We can't guess it, so fetch it before we start.
cache_type = root_disk_cache()
source_host = self.get_host_for_server(server_id)
destination_host = self.get_host_other_than(server_id)
LOG.info("Live migrate from source %s to destination %s",
source_host, destination_host)
self.live_migrate(server_id, destination_host, 'ACTIVE')
self.live_migrate(server_id, 'ACTIVE')
# Assert cache-mode has not changed during live migration
self.assertEqual(cache_type, root_disk_cache())
class LiveMigrationAndReboot(LiveMigrationBase, numa_helper.NUMAHelperMixin):
dedicated_cpu_policy = {'hw:cpu_policy': 'dedicated'}
@classmethod
def skip_checks(cls):
super(LiveMigrationAndReboot, cls).skip_checks()
if getattr(CONF.whitebox_hardware, 'cpu_topology', None) is None:
msg = "cpu_topology in whitebox-hardware is not present"
raise cls.skipException(msg)
def _migrate_and_reboot_instance(self, section, cpu_set_parameter):
flavor_vcpu_size = 2
cpu_list = hardware.get_all_cpus()
if len(cpu_list) < 4:
raise self.skipException('Requires 4 or more pCPUs to execute '
'the test')
host1, host2 = self.list_compute_hosts()
# Create two different cpu dedicated ranges for each host in order
# to force different domain XML after instance migration
host1_dedicated_set = cpu_list[:2]
host2_dedicated_set = cpu_list[2:4]
dedicated_flavor = self.create_flavor(
vcpus=flavor_vcpu_size,
extra_specs=self.dedicated_cpu_policy
)
host1_sm = clients.NovaServiceManager(host1, 'nova-compute',
self.os_admin.services_client)
host2_sm = clients.NovaServiceManager(host2, 'nova-compute',
self.os_admin.services_client)
with whitebox_utils.multicontext(
host1_sm.config_options(
(section, cpu_set_parameter,
hardware.format_cpu_spec(host1_dedicated_set))),
host2_sm.config_options(
(section, cpu_set_parameter,
hardware.format_cpu_spec(host2_dedicated_set)))
):
# Create a server with a dedicated cpu policy
server = self.create_test_server(
flavor=dedicated_flavor['id']
)
# Gather the pinned CPUs for the instance prior to migration
pinned_cpus_pre_migration = self.get_pinning_as_set(server['id'])
# Determine the destination migration host and migrate the server
# to that host
compute_dest = self.get_host_other_than(server['id'])
self.live_migrate(server['id'], compute_dest, 'ACTIVE')
# After successful migration determine the instances pinned CPUs
pinned_cpus_post_migration = self.get_pinning_as_set(server['id'])
# Confirm the pCPUs are no longer the same as they were when
# on the source compute host
self.assertTrue(
pinned_cpus_post_migration.isdisjoint(
pinned_cpus_pre_migration),
"After migration the the server %s's current pinned CPU's "
"%s should no longer match the pinned CPU's it had pre "
" migration %s" % (server['id'], pinned_cpus_post_migration,
pinned_cpus_pre_migration)
)
# Soft reboot the server
# TODO(artom) If the soft reboot fails, the libvirt driver will do
# a hard reboot. This is only detectable through log parsing, so to
# be 100% sure we got the soft reboot we wanted, we should probably
# do that.
self.reboot_server(server['id'], type='SOFT')
# Gather the server's pinned CPUs after the soft reboot
pinned_cpus_post_reboot = self.get_pinning_as_set(server['id'])
# Validate the server's pinned CPUs remain the same after the
# reboot
self.assertTrue(
pinned_cpus_post_migration == pinned_cpus_post_reboot,
'After soft rebooting server %s its pinned CPUs should have '
'remained the same as %s, but are instead now %s' % (
server['id'], pinned_cpus_post_migration,
pinned_cpus_post_reboot)
)
self.delete_server(server['id'])
class VCPUPinSetMigrateAndReboot(LiveMigrationAndReboot):
max_microversion = '2.79'
pin_set_mode = 'vcpu_pin_set'
pin_section = 'DEFAULT'
def test_vcpu_pin_migrate_and_reboot(self):
self._migrate_and_reboot_instance(self.pin_section, self.pin_set_mode)
class CPUDedicatedMigrateAndReboot(LiveMigrationAndReboot):
min_microversion = '2.79'
max_microversion = 'latest'
pin_set_mode = 'cpu_dedicated_set'
pin_section = 'compute'
def test_cpu_dedicated_migrate_and_reboot(self):
self._migrate_and_reboot_instance(self.pin_section, self.pin_set_mode)
def test_live_migrate_and_reboot(self):
"""Test for bug 1890501. Assumes that [compute]cpu_dedicated_set
(or [DEFAULT]vcpu_pinset in the legacy case) are
different on all compute hosts in the deployment.
"""
flavor = self.create_flavor(
extra_specs={'hw:cpu_policy': 'dedicated'})
server = self.create_test_server(flavor=flavor['id'])
pinned_cpus_pre_migration = self.get_pinning_as_set(server['id'])
self.live_migrate(server['id'], 'ACTIVE')
pinned_cpus_post_migration = self.get_pinning_as_set(server['id'])
self.assertTrue(
pinned_cpus_post_migration.isdisjoint(pinned_cpus_pre_migration),
"After migration the the server %s's current pinned CPU's "
"%s should no longer match the pinned CPU's it had pre "
" migration %s" % (server['id'], pinned_cpus_post_migration,
pinned_cpus_pre_migration))
# TODO(artom) If the soft reboot fails, the libvirt driver will do
# a hard reboot. This is only detectable through log parsing, so to
# be 100% sure we got the soft reboot we wanted, we should probably
# do that.
self.reboot_server(server['id'], type='SOFT')
pinned_cpus_post_reboot = self.get_pinning_as_set(server['id'])
self.assertTrue(
pinned_cpus_post_migration == pinned_cpus_post_reboot,
'After soft rebooting server %s its pinned CPUs should have '
'remained the same as %s, but are instead now %s' % (
server['id'], pinned_cpus_post_migration,
pinned_cpus_post_reboot))

View File

@ -487,7 +487,7 @@ class SRIOVMigration(SRIOVBase):
)
# Live migrate the server
self.live_migrate(server['id'], hostname2, 'ACTIVE')
self.live_migrate(server['id'], 'ACTIVE', target_host=hostname2)
# Search the instace's XML for the SR-IOV network device element based
# on the mac address and binding:vnic_type from port info
@ -514,7 +514,7 @@ class SRIOVMigration(SRIOVBase):
'is %s' % pci_allocated_count)
# Migrate server back to the original host
self.live_migrate(server['id'], hostname1, 'ACTIVE')
self.live_migrate(server['id'], 'ACTIVE', target_host=hostname1)
# Again find the instance's network device element based on the mac
# address and binding:vnic_type from the port info provided by ports

View File

@ -61,6 +61,5 @@ class SelinuxLabelsTest(base.BaseWhiteboxComputeTest):
def test_live_migrate_with_label_check(self):
server = self.create_test_server()
self._assert_svirt_labels(server)
destination_host = self.get_host_other_than(server['id'])
self.live_migrate(server['id'], destination_host, 'ACTIVE')
self.live_migrate(server['id'], 'ACTIVE')
self._assert_svirt_labels(server)