From 31ccbbb4c0ef43e723c1da65130b1e1c07f6b61d Mon Sep 17 00:00:00 2001 From: Amit Uniyal Date: Wed, 10 Apr 2024 04:57:37 +0000 Subject: [PATCH] evacuation tests - Adds evacuation tests - Adds exception MigrationException - Adds function to run virsh cmds in host Depends-On: https://review.opendev.org/c/openstack/tempest/+/919920 Change-Id: Idc0619afedc79945efe001051e86bd80c9136d1e --- whitebox_tempest_plugin/api/compute/base.py | 23 +++ .../api/compute/test_server_evacuation.py | 159 ++++++++++++++++++ .../api/compute/test_vdpa.py | 42 ++++- .../api/compute/test_vgpu.py | 78 +++++++++ whitebox_tempest_plugin/common/waiters.py | 20 +++ whitebox_tempest_plugin/exceptions.py | 4 + 6 files changed, 325 insertions(+), 1 deletion(-) create mode 100644 whitebox_tempest_plugin/api/compute/test_server_evacuation.py diff --git a/whitebox_tempest_plugin/api/compute/base.py b/whitebox_tempest_plugin/api/compute/base.py index 87e93cf4..553956ef 100644 --- a/whitebox_tempest_plugin/api/compute/base.py +++ b/whitebox_tempest_plugin/api/compute/base.py @@ -22,6 +22,8 @@ from tempest.common import waiters from tempest import config from tempest.lib.common.utils import data_utils from tempest.lib.common.utils import test_utils +from time import sleep +from whitebox_tempest_plugin.common import waiters as wb_waiters from whitebox_tempest_plugin.services import clients @@ -135,6 +137,21 @@ class BaseWhiteboxComputeTest(base.BaseV2ComputeAdminTest): xml = virshxml.dumpxml(server_instance_name) return ET.fromstring(xml) + def shutdown_server_on_host(self, server_id, host): + # This runs virsh shutdown commands on host + server = self.os_admin.servers_client.show_server(server_id)['server'] + domain = server['OS-EXT-SRV-ATTR:instance_name'] + + ssh_cl = clients.SSHClient(host) + cmd = "virsh shutdown %s " % domain + ssh_cl.execute(cmd, sudo=True) + msg, wait_counter = domain, 0 + cmd = "virsh list --name" + while domain in msg and wait_counter < 6: + sleep(10) + msg = ssh_cl.execute(cmd, sudo=True) + wait_counter += 1 + def get_server_blockdevice_path(self, server_id, device_name): host = self.get_host_for_server(server_id) virshxml = clients.VirshXMLClient(host) @@ -448,3 +465,9 @@ class BaseWhiteboxComputeTest(base.BaseV2ComputeAdminTest): root = self.get_server_xml(server_id) huge_pages = root.findall('.memoryBacking/hugepages/page') return huge_pages + + def evacuate_server(self, server_id, **kwargs): + """Evacuate server and wait for server migration to complete. + """ + self.admin_servers_client.evacuate_server(server_id, **kwargs) + wb_waiters.wait_for_server_migration_complete(self.os_admin, server_id) diff --git a/whitebox_tempest_plugin/api/compute/test_server_evacuation.py b/whitebox_tempest_plugin/api/compute/test_server_evacuation.py new file mode 100644 index 00000000..17aca0bc --- /dev/null +++ b/whitebox_tempest_plugin/api/compute/test_server_evacuation.py @@ -0,0 +1,159 @@ +# Copyright 2024 Red Hat +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from tempest.common import waiters +from tempest import config +from whitebox_tempest_plugin.api.compute import base +from whitebox_tempest_plugin.services import clients + + +CONF = config.CONF + + +class ServerEvacuation(base.BaseWhiteboxComputeTest): + '''Test server evacuation. + ''' + + min_microversion = '2.95' + + @classmethod + def skip_checks(cls): + super(ServerEvacuation, cls).skip_checks() + if CONF.compute.min_compute_nodes < 2: + msg = "Need two or more compute nodes to execute evacuate" + raise cls.skipException(msg) + + def test_evacuate_to_shutoff(self): + server_id = self.create_test_server(wait_until="ACTIVE")['id'] + host_a = self.get_host_for_server(server_id) + + # set compute service down in host-A + host_a_svc = clients.NovaServiceManager( + host_a, 'nova-compute', self.os_admin.services_client) + + with host_a_svc.stopped(): + # as compute service is down at src host, + # shutdown server using virsh + self.shutdown_server_on_host(server_id, host_a) + self.evacuate_server(server_id) + + # after evacuation server stays stopped at destination + self.assertNotEqual(self.get_host_for_server(server_id), host_a) + server = self.os_admin.servers_client.show_server(server_id)['server'] + self.assertEqual(server['status'], 'SHUTOFF') + + def test_evacuate_with_target_host(self): + server_id = self.create_test_server(wait_until="ACTIVE")['id'] + host_a = self.get_host_for_server(server_id) + host_b = self.get_host_other_than(server_id) + + host_a_svc = clients.NovaServiceManager( + host_a, 'nova-compute', self.os_admin.services_client) + + with host_a_svc.stopped(): + self.shutdown_server_on_host(server_id, host_a) + # pass target host + self.evacuate_server(server_id, host=host_b) + + self.assertEqual(self.get_host_for_server(server_id), host_b) + server = self.os_admin.servers_client.show_server(server_id)['server'] + self.assertEqual(server['status'], 'SHUTOFF') + + def test_evacuate_attached_vol(self): + server = self.create_test_server(wait_until="ACTIVE") + server_id = server['id'] + volume = self.create_volume() + vol_id = volume['id'] + + # Attach the volume + attachment = self.attach_volume(server, volume) + waiters.wait_for_volume_resource_status( + self.volumes_client, attachment['volumeId'], 'in-use') + + host_a = self.get_host_for_server(server_id) + + server = self.os_admin.servers_client.show_server(server_id)['server'] + # verify vol if before evacuation + self.assertEqual( + server['os-extended-volumes:volumes_attached'][0]['id'], vol_id) + + host_a_svc = clients.NovaServiceManager( + host_a, 'nova-compute', self.os_admin.services_client) + + with host_a_svc.stopped(): + self.shutdown_server_on_host(server_id, host_a) + self.evacuate_server(server_id) + + self.assertNotEqual(self.get_host_for_server(server_id), host_a) + server = self.os_admin.servers_client.show_server(server_id)['server'] + self.assertEqual(server['status'], 'SHUTOFF') + # evacuated VM should have same volume attached to it. + self.assertEqual( + server['os-extended-volumes:volumes_attached'][0]['id'], vol_id) + + def test_evacuate_bfv_server(self): + server = self.create_test_server( + volume_backed=True, wait_until="ACTIVE", name="bfv-server") + server_id = server['id'] + host_a = self.get_host_for_server(server_id) + + host_a_svc = clients.NovaServiceManager( + host_a, 'nova-compute', self.os_admin.services_client) + + server = self.os_admin.servers_client.show_server(server_id)['server'] + vol_id = server['os-extended-volumes:volumes_attached'][0]['id'] + + with host_a_svc.stopped(): + self.shutdown_server_on_host(server_id, host_a) + self.evacuate_server(server_id) + + self.assertNotEqual(self.get_host_for_server(server_id), host_a) + server = self.os_admin.servers_client.show_server(server_id)['server'] + self.assertEqual(server['status'], 'SHUTOFF') + self.assertEqual( + server['os-extended-volumes:volumes_attached'][0]['id'], vol_id) + + +class ServerEvacuationV294(base.BaseWhiteboxComputeTest): + '''Test server evacuation. or microversion 2.94 + ''' + + min_microversion = '2.94' + + @classmethod + def skip_checks(cls): + super(ServerEvacuationV294, cls).skip_checks() + if CONF.compute.min_compute_nodes < 2: + msg = "Need two or more compute nodes to execute evacuate" + raise cls.skipException(msg) + + def test_evacuate_to_active(self): + server_id = self.create_test_server(wait_until="ACTIVE")['id'] + host_a = self.get_host_for_server(server_id) + + # set compute service down in host-A + host_a_svc = clients.NovaServiceManager( + host_a, 'nova-compute', self.os_admin.services_client) + + with host_a_svc.stopped(): + # as compute service is down at src host, + # shutdown server using virsh + self.shutdown_server_on_host(server_id, host_a) + self.evacuate_server(server_id) + + # after evacuation server starts by itself at destination + self.assertNotEqual(self.get_host_for_server(server_id), host_a) + server = self.os_admin.servers_client.show_server(server_id)['server'] + self.assertEqual(server['status'], 'ACTIVE') diff --git a/whitebox_tempest_plugin/api/compute/test_vdpa.py b/whitebox_tempest_plugin/api/compute/test_vdpa.py index 700dc6e7..44fd0ac8 100644 --- a/whitebox_tempest_plugin/api/compute/test_vdpa.py +++ b/whitebox_tempest_plugin/api/compute/test_vdpa.py @@ -15,8 +15,8 @@ from tempest.common import waiters from tempest import config - from whitebox_tempest_plugin.api.compute import base +from whitebox_tempest_plugin.services import clients from oslo_log import log as logging @@ -208,3 +208,43 @@ class VDPAResizeInstance(VDPASmokeTests): server['id'], port['port']['id'] ) + + +class VDPAEvacuateInstance(VDPASmokeTests): + + min_microversion = '2.95' + + @classmethod + def skip_checks(cls): + super(VDPAEvacuateInstance, cls).skip_checks() + if CONF.compute.min_compute_nodes < 2: + msg = "Need two or more compute nodes to execute evacuate." + raise cls.skipException(msg) + + def test_evacuate_server_vdpa(self): + # Create an instance with a vDPA port and evacuate + port = self._create_port_from_vnic_type( + net=self.network, + vnic_type='vdpa' + ) + server = self.create_test_server( + networks=[{'port': port['port']['id']}], + wait_until='ACTIVE' + ) + server_id = server['id'] + host_a = self.get_host_for_server(server_id) + + host_a_svc = clients.NovaServiceManager( + host_a, 'nova-compute', self.os_admin.services_client) + + with host_a_svc.stopped(): + self.shutdown_server_on_host(server_id, host_a) + self.evacuate_server(server_id) + + self.assertNotEqual(self.get_host_for_server(server_id), host_a) + # Confirm dev_type, allocation status, and pci address information are + # correct in pci_devices table of openstack DB + self._verify_neutron_port_binding( + server_id, + port['port']['id'] + ) diff --git a/whitebox_tempest_plugin/api/compute/test_vgpu.py b/whitebox_tempest_plugin/api/compute/test_vgpu.py index 25bcae0f..8835dd4e 100644 --- a/whitebox_tempest_plugin/api/compute/test_vgpu.py +++ b/whitebox_tempest_plugin/api/compute/test_vgpu.py @@ -17,6 +17,7 @@ from tempest.common.utils.linux import remote_client from tempest.common import waiters from tempest import config from tempest.lib.common.utils import data_utils +from tempest.lib import decorators from whitebox_tempest_plugin.api.compute import base from whitebox_tempest_plugin.services import clients @@ -609,3 +610,80 @@ class VGPUMultiTypes(VGPUTest): expected_mdev_type, found_mdev_type, "The found mdev_type %s does not match the expected mdev_type " "%s for %s" % (found_mdev_type, expected_mdev_type, trait)) + + +@decorators.skip_because(bug='1874664') +class VGPUServerEvacuation(VGPUTest): + + min_microversion = '2.95' + + @classmethod + def skip_checks(cls): + super(VGPUServerEvacuation, cls).skip_checks() + if CONF.compute.min_compute_nodes < 2: + msg = "Need two or more compute nodes to execute evacuate." + raise cls.skipException(msg) + + def test_evacuate_server_having_vgpu(self): + starting_rp_vgpu_inventory = \ + self._get_vgpu_inventories_for_deployment() + validation_resources = self.get_test_validation_resources( + self.os_primary) + server = self.create_validateable_instance( + self.vgpu_flavor, validation_resources) + linux_client = self._create_ssh_client(server, validation_resources) + + # Determine the host the vGPU enabled guest is currently on. Next + # get another potential compute host to serve as the migration target + src_host = self.get_host_for_server(server['id']) + dest_host = self.get_host_other_than(server['id']) + + # Get the current VGPU usage from the resource providers on + # the source and destination compute hosts. + pre_src_usage = self._get_vgpu_util_for_host(src_host) + pre_dest_usage = self._get_vgpu_util_for_host(dest_host) + + host_a_svc = clients.NovaServiceManager( + src_host, 'nova-compute', self.os_admin.services_client) + + with host_a_svc.stopped(): + self.shutdown_server_on_host(server['id'], src_host) + self.evacuate_server(server['id']) + + self.assertEqual(self.get_host_for_server(server['id']), dest_host) + + LOG.info('Guest %(server)s was just evacuated to %(dest_host)s, ' + 'guest will now be validated after operation', + {'server': server['id'], 'dest_host': dest_host}) + self._validate_vgpu_instance( + server, + linux_client=linux_client, + expected_device_count=self.vgpu_amount_per_instance) + + # Regather the VGPU resource usage on both compute hosts involved. + # Confirm the original source host's VGPU usage has + # updated to no longer report original usage for the vGPU resource and + # the destination is now accounting for the resource. + post_src_usage = self._get_vgpu_util_for_host(src_host) + post_dest_usage = self._get_vgpu_util_for_host(dest_host) + expected_src_usage = pre_src_usage - self.vgpu_amount_per_instance + self.assertEqual( + expected_src_usage, + post_src_usage, 'After evacuation, host %s expected to have %s ' + 'usage for resource class VGPU but instead found %d' % + (src_host, expected_src_usage, post_src_usage)) + expected_dest_usage = pre_dest_usage + self.vgpu_amount_per_instance + self.assertEqual( + expected_dest_usage, post_dest_usage, 'After evacuation, Host ' + '%s expected to have resource class VGPU usage totaling %d but ' + 'instead found %d' % + (dest_host, expected_dest_usage, post_dest_usage)) + + # Delete the guest and confirm the inventory reverts back to the + # original amount + self.os_admin.servers_client.delete_server(server['id']) + waiters.wait_for_server_termination( + self.os_admin.servers_client, server['id']) + end_rp_vgpu_inventory = self._get_vgpu_inventories_for_deployment() + self._validate_final_vgpu_rp_inventory( + starting_rp_vgpu_inventory, end_rp_vgpu_inventory) diff --git a/whitebox_tempest_plugin/common/waiters.py b/whitebox_tempest_plugin/common/waiters.py index ac216138..017ea787 100644 --- a/whitebox_tempest_plugin/common/waiters.py +++ b/whitebox_tempest_plugin/common/waiters.py @@ -16,6 +16,7 @@ import time from tempest.lib import exceptions as lib_exc +from whitebox_tempest_plugin.exceptions import MigrationException def wait_for_nova_service_state(client, host, binary, state): @@ -33,3 +34,22 @@ def wait_for_nova_service_state(client, host, binary, state): 'Service %s on host %s failed to reach state %s within ' 'the required time (%s s)', binary, host, state, timeout) service = client.list_services(host=host, binary=binary)['services'][0] + + +def wait_for_server_migration_complete(os_admin, server_id): + start_time = int(time.time()) + timeout = os_admin.services_client.build_timeout + interval = os_admin.services_client.build_interval + 1 + while int(time.time()) - start_time <= timeout: + s_migs = os_admin.migrations_client.list_migrations() + if s_migs['migrations'][-1]['status'] in ['done', 'completed']: + break + elif s_migs['migrations'][-1]['status'] in ['error', 'failed']: + raise MigrationException( + 'Evacuation failed, because server migration failed.') + time.sleep(interval) + else: + # raise Timeout exception if migration never completed + raise lib_exc.TimeoutException( + 'Evacuation failed, because server migration did not ' + 'complete, within the required time: (%s s)' % timeout) diff --git a/whitebox_tempest_plugin/exceptions.py b/whitebox_tempest_plugin/exceptions.py index c54d6b2a..1991796d 100644 --- a/whitebox_tempest_plugin/exceptions.py +++ b/whitebox_tempest_plugin/exceptions.py @@ -26,3 +26,7 @@ class MissingServiceSectionException(exceptions.TempestException): class InvalidCPUSpec(exceptions.TempestException): message = "CPU spec is invalid: %(spec)s." + + +class MigrationException(exceptions.TempestException): + message = "Migration Failed: %(msg)s."