From 0aad338b1c68f319df603bca340ff33dc7fd7b54 Mon Sep 17 00:00:00 2001 From: Sean Mooney Date: Sat, 7 May 2022 13:49:18 +0300 Subject: [PATCH] Add VDPA support for suspend and livemigrate This change append vnic-type vdpa to the list of passthough vnic types and removes the api blocks This should enable the existing suspend and live migrate code to properly manage vdpa interfaces enabling "hot plug" live migrations similar to direct sr-iov. Implements: blueprint vdpa-suspend-detach-and-live-migrate Change-Id: I878a9609ce0d84f7e3c2fef99e369b34d627a0df --- doc/source/admin/vdpa.rst | 12 +++- nova/compute/api.py | 18 ++++-- nova/network/model.py | 16 ++--- nova/objects/service.py | 5 +- nova/tests/functional/integrated_helpers.py | 2 +- .../libvirt/test_pci_sriov_servers.py | 62 +++++++++++++++++-- .../regressions/test_bug_1944619.py | 2 +- ...ach-and-live-migrate-e591e6a03a0c834d.yaml | 25 ++++++++ 8 files changed, 118 insertions(+), 24 deletions(-) create mode 100644 releasenotes/notes/vdpa-suspend-detach-and-live-migrate-e591e6a03a0c834d.yaml diff --git a/doc/source/admin/vdpa.rst b/doc/source/admin/vdpa.rst index d293cda93ff1..5d0408b0b362 100644 --- a/doc/source/admin/vdpa.rst +++ b/doc/source/admin/vdpa.rst @@ -7,8 +7,8 @@ Using ports vnic_type='vdpa' .. versionadded:: 26.0.0 (Zed) - Added support for most instance move operations (except live migration), - and the interface attach/detach operations. + Added support for all instance move operations, + and the interface attach/detach, and suspend/resume operations. .. important:: The functionality described below is only supported by the @@ -76,3 +76,11 @@ in neutron and passed into nova as part of the server create request. openstack port create --network --vnic-type vdpa vdpa-port openstack server create --flavor --image --port vdpa-vm + +vDPA live migration +~~~~~~~~~~~~~~~~~~~ + +At this time QEMU and the ``vhost-vdpa`` kernel module do not support transparent +live migration of vm with vdpa ports. To enable live migration of VMs with +vDPA interfaces the existing SR-IOV hotplug live migration procedure has been +extended to include ``vnic_type='vdpa'`` interfaces. diff --git a/nova/compute/api.py b/nova/compute/api.py index d0d7a0c5ac77..9fc4ca24a315 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -119,6 +119,8 @@ MIN_COMPUTE_INT_ATTACH_WITH_EXTENDED_RES_REQ = 60 SUPPORT_VNIC_TYPE_REMOTE_MANAGED = 61 MIN_COMPUTE_VDPA_ATTACH_DETACH = 62 +MIN_COMPUTE_VDPA_HOTPLUG_LIVE_MIGRATION = 63 + # FIXME(danms): Keep a global cache of the cells we find the # first time we look. This needs to be refreshed on a timer or @@ -4657,11 +4659,10 @@ class API: return self.compute_rpcapi.get_instance_diagnostics(context, instance=instance) - # FIXME(sean-k-mooney): Suspend does not work because we do not unplug - # the vDPA devices before calling managed save as we do with SR-IOV - # devices @block_port_accelerators() - @reject_vdpa_instances(instance_actions.SUSPEND) + @reject_vdpa_instances( + instance_actions.SUSPEND, until=MIN_COMPUTE_VDPA_HOTPLUG_LIVE_MIGRATION + ) @block_accelerators() @reject_sev_instances(instance_actions.SUSPEND) @check_instance_lock @@ -4674,6 +4675,9 @@ class API: self.compute_rpcapi.suspend_instance(context, instance) @check_instance_lock + @reject_vdpa_instances( + instance_actions.RESUME, until=MIN_COMPUTE_VDPA_HOTPLUG_LIVE_MIGRATION + ) @check_instance_state(vm_state=[vm_states.SUSPENDED]) def resume(self, context, instance): """Resume the given instance.""" @@ -5410,7 +5414,6 @@ class API: ) def detach_interface(self, context, instance, port_id): """Detach an network adapter from an instance.""" - for vif in instance.get_network_info(): if vif['id'] == port_id: if vif['vnic_type'] in ( @@ -5462,7 +5465,10 @@ class API: @block_extended_resource_request @block_port_accelerators() - @reject_vdpa_instances(instance_actions.LIVE_MIGRATION) + @reject_vdpa_instances( + instance_actions.LIVE_MIGRATION, + until=MIN_COMPUTE_VDPA_HOTPLUG_LIVE_MIGRATION + ) @block_accelerators() @reject_vtpm_instances(instance_actions.LIVE_MIGRATION) @reject_sev_instances(instance_actions.LIVE_MIGRATION) diff --git a/nova/network/model.py b/nova/network/model.py index 5bd70837db55..1260349bcdba 100644 --- a/nova/network/model.py +++ b/nova/network/model.py @@ -122,20 +122,20 @@ VNIC_TYPE_REMOTE_MANAGED = "remote-managed" # selected compute node. VNIC_TYPES_SRIOV = ( VNIC_TYPE_DIRECT, VNIC_TYPE_MACVTAP, VNIC_TYPE_DIRECT_PHYSICAL, - VNIC_TYPE_VIRTIO_FORWARDER, VNIC_TYPE_VDPA, VNIC_TYPE_REMOTE_MANAGED) + VNIC_TYPE_VIRTIO_FORWARDER, VNIC_TYPE_VDPA, VNIC_TYPE_REMOTE_MANAGED +) # Define list of ports which are passthrough to the guest # and need a special treatment on snapshot and suspend/resume -VNIC_TYPES_DIRECT_PASSTHROUGH = (VNIC_TYPE_DIRECT, - VNIC_TYPE_DIRECT_PHYSICAL, - VNIC_TYPE_ACCELERATOR_DIRECT, - VNIC_TYPE_ACCELERATOR_DIRECT_PHYSICAL, - VNIC_TYPE_REMOTE_MANAGED) +VNIC_TYPES_DIRECT_PASSTHROUGH = ( + VNIC_TYPE_DIRECT, VNIC_TYPE_DIRECT_PHYSICAL, + VNIC_TYPE_ACCELERATOR_DIRECT, VNIC_TYPE_ACCELERATOR_DIRECT_PHYSICAL, + VNIC_TYPE_REMOTE_MANAGED, VNIC_TYPE_VDPA +) # Define list of ports which contains devices managed by cyborg. VNIC_TYPES_ACCELERATOR = ( - VNIC_TYPE_ACCELERATOR_DIRECT, - VNIC_TYPE_ACCELERATOR_DIRECT_PHYSICAL + VNIC_TYPE_ACCELERATOR_DIRECT, VNIC_TYPE_ACCELERATOR_DIRECT_PHYSICAL ) # Constants for the 'vif_model' values diff --git a/nova/objects/service.py b/nova/objects/service.py index e67ec17217c8..8885120ddd01 100644 --- a/nova/objects/service.py +++ b/nova/objects/service.py @@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__) # NOTE(danms): This is the global service version counter -SERVICE_VERSION = 62 +SERVICE_VERSION = 63 # NOTE(danms): This is our SERVICE_VERSION history. The idea is that any @@ -219,6 +219,9 @@ SERVICE_VERSION_HISTORY = ( # Version 62: Compute RPC v6.0: # Add support for VDPA port attach/detach {'compute_rpc': '6.0'}, + # Version 63: Compute RPC v6.0: + # Add support for VDPA hotplug live migration and suspend/resume + {'compute_rpc': '6.0'}, ) # This is used to raise an error at service startup if older than N-1 computes diff --git a/nova/tests/functional/integrated_helpers.py b/nova/tests/functional/integrated_helpers.py index 623c4f0ef705..fa80c860ef8e 100644 --- a/nova/tests/functional/integrated_helpers.py +++ b/nova/tests/functional/integrated_helpers.py @@ -561,8 +561,8 @@ class InstanceHelperMixin: self.api.post_server_action( server['id'], {'os-migrateLive': {'host': None, 'block_migration': 'auto'}}) - self._wait_for_state_change(server, server_expected_state) self._wait_for_migration_status(server, [migration_expected_state]) + return self._wait_for_state_change(server, server_expected_state) _live_migrate_server = _live_migrate diff --git a/nova/tests/functional/libvirt/test_pci_sriov_servers.py b/nova/tests/functional/libvirt/test_pci_sriov_servers.py index a38a0064b1d2..1f943ee2eecb 100644 --- a/nova/tests/functional/libvirt/test_pci_sriov_servers.py +++ b/nova/tests/functional/libvirt/test_pci_sriov_servers.py @@ -1062,7 +1062,7 @@ class SRIOVAttachDetachTest(_PCIServersTestBase): self.neutron.sriov_pf_port2['id']) -class VDPAServersTest(_PCIServersTestBase): +class VDPAServersTest(_PCIServersWithMigrationTestBase): # this is needed for os_compute_api:os-migrate-server:migrate policy ADMIN_API = True @@ -1094,7 +1094,6 @@ class VDPAServersTest(_PCIServersTestBase): def setUp(self): super().setUp() - # The ultimate base class _IntegratedTestBase uses NeutronFixture but # we need a bit more intelligent neutron for these tests. Applying the # new fixture here means that we re-stub what the previous neutron @@ -1180,7 +1179,6 @@ class VDPAServersTest(_PCIServersTestBase): expected = """ - """ actual = etree.tostring(elem, encoding='unicode') @@ -1568,8 +1566,62 @@ class VDPAServersTest(_PCIServersTestBase): self.assertEqual( dest, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) - def test_suspend(self): - self._test_common(self._suspend_server) + def test_suspend_and_resume_service_version_62(self): + with mock.patch( + "nova.objects.service.get_minimum_version_all_cells", + return_value=62 + ): + self._test_common(self._suspend_server) + + def test_suspend_and_resume(self): + source = self.start_vdpa_compute(hostname='source') + vdpa_port, server = self._create_port_and_server() + num_pci = self.NUM_PFS + self.NUM_VFS + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + server = self._suspend_server(server) + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + self.assertEqual('SUSPENDED', server['status']) + server = self._resume_server(server) + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + self.assertEqual('ACTIVE', server['status']) + + def test_live_migrate_service_version_62(self): + with mock.patch( + "nova.objects.service.get_minimum_version_all_cells", + return_value=62 + ): + self._test_common(self._live_migrate) + + def test_live_migrate(self): + source = self.start_vdpa_compute(hostname='source') + dest = self.start_vdpa_compute(hostname='dest') + + num_pci = self.NUM_PFS + self.NUM_VFS + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci) + + # ensure we boot the vm on the "source" compute + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'disabled'}) + vdpa_port, server = self._create_port_and_server() + self.assertEqual( + source, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + # enable the dest we do not need to disable the source since cold + # migrate wont happen to the same host in the libvirt driver + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'enabled'}) + + with mock.patch( + 'nova.virt.libvirt.LibvirtDriver.' + '_detach_direct_passthrough_vifs' + ): + server = self._live_migrate(server) + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci - 2) + self.assertEqual( + dest, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) class PCIServersTest(_PCIServersTestBase): diff --git a/nova/tests/functional/regressions/test_bug_1944619.py b/nova/tests/functional/regressions/test_bug_1944619.py index bdd06c493ffa..430a6e39818b 100644 --- a/nova/tests/functional/regressions/test_bug_1944619.py +++ b/nova/tests/functional/regressions/test_bug_1944619.py @@ -71,6 +71,6 @@ class TestRollbackWithHWOffloadedOVS( # Live migrate the instance to another host self._live_migrate(self.server, migration_expected_state='failed', - server_expected_state='MIGRATING') + server_expected_state='ACTIVE') mlpr.assert_not_called() mlpp.assert_called_once() diff --git a/releasenotes/notes/vdpa-suspend-detach-and-live-migrate-e591e6a03a0c834d.yaml b/releasenotes/notes/vdpa-suspend-detach-and-live-migrate-e591e6a03a0c834d.yaml new file mode 100644 index 000000000000..45092b5a00ea --- /dev/null +++ b/releasenotes/notes/vdpa-suspend-detach-and-live-migrate-e591e6a03a0c834d.yaml @@ -0,0 +1,25 @@ +--- +features: + - | + vDPA support was first introduced in the 23.0.0 (Wallaby) + release with limited instance lifecycle operations. Nova now supports + all instance lifecycle operations including suspend, attach/detach + and hot-plug live migration. + + QEMU and the Linux kernel do not currently support transparent + live migration of vDPA devices at this time. Hot-plug live migration + unplugs the VDPA device on the source host before the VM is live migrated + and automatically hot-plugs the device on the destination after the + migration. While this can lead to packet loss it enable live migration + to be used when needed until transparent live migration can be added + in a future release. + + VDPA Hot-plug live migration requires all compute services to be upgraded + to service level 63 to be enabled. Similarly suspend resume need service + level 63 and attach/detach require service level 62. + As such it will not be available to use during a rolling upgrade but will + become available when all host are upgraded to the 26.0.0 (Zed) release. + + With the addition of these features, all instance lifecycle operations are + now valid for VMs with VDPA neutron ports. +