diff --git a/doc/source/admin/vdpa.rst b/doc/source/admin/vdpa.rst index d293cda93ff1..5d0408b0b362 100644 --- a/doc/source/admin/vdpa.rst +++ b/doc/source/admin/vdpa.rst @@ -7,8 +7,8 @@ Using ports vnic_type='vdpa' .. versionadded:: 26.0.0 (Zed) - Added support for most instance move operations (except live migration), - and the interface attach/detach operations. + Added support for all instance move operations, + and the interface attach/detach, and suspend/resume operations. .. important:: The functionality described below is only supported by the @@ -76,3 +76,11 @@ in neutron and passed into nova as part of the server create request. openstack port create --network --vnic-type vdpa vdpa-port openstack server create --flavor --image --port vdpa-vm + +vDPA live migration +~~~~~~~~~~~~~~~~~~~ + +At this time QEMU and the ``vhost-vdpa`` kernel module do not support transparent +live migration of vm with vdpa ports. To enable live migration of VMs with +vDPA interfaces the existing SR-IOV hotplug live migration procedure has been +extended to include ``vnic_type='vdpa'`` interfaces. diff --git a/nova/compute/api.py b/nova/compute/api.py index d0d7a0c5ac77..9fc4ca24a315 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -119,6 +119,8 @@ MIN_COMPUTE_INT_ATTACH_WITH_EXTENDED_RES_REQ = 60 SUPPORT_VNIC_TYPE_REMOTE_MANAGED = 61 MIN_COMPUTE_VDPA_ATTACH_DETACH = 62 +MIN_COMPUTE_VDPA_HOTPLUG_LIVE_MIGRATION = 63 + # FIXME(danms): Keep a global cache of the cells we find the # first time we look. This needs to be refreshed on a timer or @@ -4657,11 +4659,10 @@ class API: return self.compute_rpcapi.get_instance_diagnostics(context, instance=instance) - # FIXME(sean-k-mooney): Suspend does not work because we do not unplug - # the vDPA devices before calling managed save as we do with SR-IOV - # devices @block_port_accelerators() - @reject_vdpa_instances(instance_actions.SUSPEND) + @reject_vdpa_instances( + instance_actions.SUSPEND, until=MIN_COMPUTE_VDPA_HOTPLUG_LIVE_MIGRATION + ) @block_accelerators() @reject_sev_instances(instance_actions.SUSPEND) @check_instance_lock @@ -4674,6 +4675,9 @@ class API: self.compute_rpcapi.suspend_instance(context, instance) @check_instance_lock + @reject_vdpa_instances( + instance_actions.RESUME, until=MIN_COMPUTE_VDPA_HOTPLUG_LIVE_MIGRATION + ) @check_instance_state(vm_state=[vm_states.SUSPENDED]) def resume(self, context, instance): """Resume the given instance.""" @@ -5410,7 +5414,6 @@ class API: ) def detach_interface(self, context, instance, port_id): """Detach an network adapter from an instance.""" - for vif in instance.get_network_info(): if vif['id'] == port_id: if vif['vnic_type'] in ( @@ -5462,7 +5465,10 @@ class API: @block_extended_resource_request @block_port_accelerators() - @reject_vdpa_instances(instance_actions.LIVE_MIGRATION) + @reject_vdpa_instances( + instance_actions.LIVE_MIGRATION, + until=MIN_COMPUTE_VDPA_HOTPLUG_LIVE_MIGRATION + ) @block_accelerators() @reject_vtpm_instances(instance_actions.LIVE_MIGRATION) @reject_sev_instances(instance_actions.LIVE_MIGRATION) diff --git a/nova/network/model.py b/nova/network/model.py index 5bd70837db55..1260349bcdba 100644 --- a/nova/network/model.py +++ b/nova/network/model.py @@ -122,20 +122,20 @@ VNIC_TYPE_REMOTE_MANAGED = "remote-managed" # selected compute node. VNIC_TYPES_SRIOV = ( VNIC_TYPE_DIRECT, VNIC_TYPE_MACVTAP, VNIC_TYPE_DIRECT_PHYSICAL, - VNIC_TYPE_VIRTIO_FORWARDER, VNIC_TYPE_VDPA, VNIC_TYPE_REMOTE_MANAGED) + VNIC_TYPE_VIRTIO_FORWARDER, VNIC_TYPE_VDPA, VNIC_TYPE_REMOTE_MANAGED +) # Define list of ports which are passthrough to the guest # and need a special treatment on snapshot and suspend/resume -VNIC_TYPES_DIRECT_PASSTHROUGH = (VNIC_TYPE_DIRECT, - VNIC_TYPE_DIRECT_PHYSICAL, - VNIC_TYPE_ACCELERATOR_DIRECT, - VNIC_TYPE_ACCELERATOR_DIRECT_PHYSICAL, - VNIC_TYPE_REMOTE_MANAGED) +VNIC_TYPES_DIRECT_PASSTHROUGH = ( + VNIC_TYPE_DIRECT, VNIC_TYPE_DIRECT_PHYSICAL, + VNIC_TYPE_ACCELERATOR_DIRECT, VNIC_TYPE_ACCELERATOR_DIRECT_PHYSICAL, + VNIC_TYPE_REMOTE_MANAGED, VNIC_TYPE_VDPA +) # Define list of ports which contains devices managed by cyborg. VNIC_TYPES_ACCELERATOR = ( - VNIC_TYPE_ACCELERATOR_DIRECT, - VNIC_TYPE_ACCELERATOR_DIRECT_PHYSICAL + VNIC_TYPE_ACCELERATOR_DIRECT, VNIC_TYPE_ACCELERATOR_DIRECT_PHYSICAL ) # Constants for the 'vif_model' values diff --git a/nova/objects/service.py b/nova/objects/service.py index e67ec17217c8..8885120ddd01 100644 --- a/nova/objects/service.py +++ b/nova/objects/service.py @@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__) # NOTE(danms): This is the global service version counter -SERVICE_VERSION = 62 +SERVICE_VERSION = 63 # NOTE(danms): This is our SERVICE_VERSION history. The idea is that any @@ -219,6 +219,9 @@ SERVICE_VERSION_HISTORY = ( # Version 62: Compute RPC v6.0: # Add support for VDPA port attach/detach {'compute_rpc': '6.0'}, + # Version 63: Compute RPC v6.0: + # Add support for VDPA hotplug live migration and suspend/resume + {'compute_rpc': '6.0'}, ) # This is used to raise an error at service startup if older than N-1 computes diff --git a/nova/tests/functional/integrated_helpers.py b/nova/tests/functional/integrated_helpers.py index 623c4f0ef705..fa80c860ef8e 100644 --- a/nova/tests/functional/integrated_helpers.py +++ b/nova/tests/functional/integrated_helpers.py @@ -561,8 +561,8 @@ class InstanceHelperMixin: self.api.post_server_action( server['id'], {'os-migrateLive': {'host': None, 'block_migration': 'auto'}}) - self._wait_for_state_change(server, server_expected_state) self._wait_for_migration_status(server, [migration_expected_state]) + return self._wait_for_state_change(server, server_expected_state) _live_migrate_server = _live_migrate diff --git a/nova/tests/functional/libvirt/test_pci_sriov_servers.py b/nova/tests/functional/libvirt/test_pci_sriov_servers.py index a38a0064b1d2..1f943ee2eecb 100644 --- a/nova/tests/functional/libvirt/test_pci_sriov_servers.py +++ b/nova/tests/functional/libvirt/test_pci_sriov_servers.py @@ -1062,7 +1062,7 @@ class SRIOVAttachDetachTest(_PCIServersTestBase): self.neutron.sriov_pf_port2['id']) -class VDPAServersTest(_PCIServersTestBase): +class VDPAServersTest(_PCIServersWithMigrationTestBase): # this is needed for os_compute_api:os-migrate-server:migrate policy ADMIN_API = True @@ -1094,7 +1094,6 @@ class VDPAServersTest(_PCIServersTestBase): def setUp(self): super().setUp() - # The ultimate base class _IntegratedTestBase uses NeutronFixture but # we need a bit more intelligent neutron for these tests. Applying the # new fixture here means that we re-stub what the previous neutron @@ -1180,7 +1179,6 @@ class VDPAServersTest(_PCIServersTestBase): expected = """ - """ actual = etree.tostring(elem, encoding='unicode') @@ -1568,8 +1566,62 @@ class VDPAServersTest(_PCIServersTestBase): self.assertEqual( dest, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) - def test_suspend(self): - self._test_common(self._suspend_server) + def test_suspend_and_resume_service_version_62(self): + with mock.patch( + "nova.objects.service.get_minimum_version_all_cells", + return_value=62 + ): + self._test_common(self._suspend_server) + + def test_suspend_and_resume(self): + source = self.start_vdpa_compute(hostname='source') + vdpa_port, server = self._create_port_and_server() + num_pci = self.NUM_PFS + self.NUM_VFS + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + server = self._suspend_server(server) + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + self.assertEqual('SUSPENDED', server['status']) + server = self._resume_server(server) + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + self.assertEqual('ACTIVE', server['status']) + + def test_live_migrate_service_version_62(self): + with mock.patch( + "nova.objects.service.get_minimum_version_all_cells", + return_value=62 + ): + self._test_common(self._live_migrate) + + def test_live_migrate(self): + source = self.start_vdpa_compute(hostname='source') + dest = self.start_vdpa_compute(hostname='dest') + + num_pci = self.NUM_PFS + self.NUM_VFS + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci) + + # ensure we boot the vm on the "source" compute + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'disabled'}) + vdpa_port, server = self._create_port_and_server() + self.assertEqual( + source, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + # enable the dest we do not need to disable the source since cold + # migrate wont happen to the same host in the libvirt driver + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'enabled'}) + + with mock.patch( + 'nova.virt.libvirt.LibvirtDriver.' + '_detach_direct_passthrough_vifs' + ): + server = self._live_migrate(server) + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci - 2) + self.assertEqual( + dest, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) class PCIServersTest(_PCIServersTestBase): diff --git a/nova/tests/functional/regressions/test_bug_1944619.py b/nova/tests/functional/regressions/test_bug_1944619.py index bdd06c493ffa..430a6e39818b 100644 --- a/nova/tests/functional/regressions/test_bug_1944619.py +++ b/nova/tests/functional/regressions/test_bug_1944619.py @@ -71,6 +71,6 @@ class TestRollbackWithHWOffloadedOVS( # Live migrate the instance to another host self._live_migrate(self.server, migration_expected_state='failed', - server_expected_state='MIGRATING') + server_expected_state='ACTIVE') mlpr.assert_not_called() mlpp.assert_called_once() diff --git a/releasenotes/notes/vdpa-suspend-detach-and-live-migrate-e591e6a03a0c834d.yaml b/releasenotes/notes/vdpa-suspend-detach-and-live-migrate-e591e6a03a0c834d.yaml new file mode 100644 index 000000000000..45092b5a00ea --- /dev/null +++ b/releasenotes/notes/vdpa-suspend-detach-and-live-migrate-e591e6a03a0c834d.yaml @@ -0,0 +1,25 @@ +--- +features: + - | + vDPA support was first introduced in the 23.0.0 (Wallaby) + release with limited instance lifecycle operations. Nova now supports + all instance lifecycle operations including suspend, attach/detach + and hot-plug live migration. + + QEMU and the Linux kernel do not currently support transparent + live migration of vDPA devices at this time. Hot-plug live migration + unplugs the VDPA device on the source host before the VM is live migrated + and automatically hot-plugs the device on the destination after the + migration. While this can lead to packet loss it enable live migration + to be used when needed until transparent live migration can be added + in a future release. + + VDPA Hot-plug live migration requires all compute services to be upgraded + to service level 63 to be enabled. Similarly suspend resume need service + level 63 and attach/detach require service level 62. + As such it will not be available to use during a rolling upgrade but will + become available when all host are upgraded to the 26.0.0 (Zed) release. + + With the addition of these features, all instance lifecycle operations are + now valid for VMs with VDPA neutron ports. +