Add VDPA support for suspend and livemigrate

This change append vnic-type vdpa to the list
of passthough vnic types and removes the api blocks

This should enable the existing suspend and live migrate
code to properly manage vdpa interfaces enabling
"hot plug" live migrations similar to direct sr-iov.

Implements: blueprint vdpa-suspend-detach-and-live-migrate
Change-Id: I878a9609ce0d84f7e3c2fef99e369b34d627a0df
This commit is contained in:
Sean Mooney 2022-05-07 13:49:18 +03:00
parent 51a970af37
commit 0aad338b1c
8 changed files with 118 additions and 24 deletions

View File

@ -7,8 +7,8 @@ Using ports vnic_type='vdpa'
.. versionadded:: 26.0.0 (Zed)
Added support for most instance move operations (except live migration),
and the interface attach/detach operations.
Added support for all instance move operations,
and the interface attach/detach, and suspend/resume operations.
.. important::
The functionality described below is only supported by the
@ -76,3 +76,11 @@ in neutron and passed into nova as part of the server create request.
openstack port create --network <my network> --vnic-type vdpa vdpa-port
openstack server create --flavor <my-flavor> --image <my-image> --port <vdpa-port uuid> vdpa-vm
vDPA live migration
~~~~~~~~~~~~~~~~~~~
At this time QEMU and the ``vhost-vdpa`` kernel module do not support transparent
live migration of vm with vdpa ports. To enable live migration of VMs with
vDPA interfaces the existing SR-IOV hotplug live migration procedure has been
extended to include ``vnic_type='vdpa'`` interfaces.

View File

@ -119,6 +119,8 @@ MIN_COMPUTE_INT_ATTACH_WITH_EXTENDED_RES_REQ = 60
SUPPORT_VNIC_TYPE_REMOTE_MANAGED = 61
MIN_COMPUTE_VDPA_ATTACH_DETACH = 62
MIN_COMPUTE_VDPA_HOTPLUG_LIVE_MIGRATION = 63
# FIXME(danms): Keep a global cache of the cells we find the
# first time we look. This needs to be refreshed on a timer or
@ -4657,11 +4659,10 @@ class API:
return self.compute_rpcapi.get_instance_diagnostics(context,
instance=instance)
# FIXME(sean-k-mooney): Suspend does not work because we do not unplug
# the vDPA devices before calling managed save as we do with SR-IOV
# devices
@block_port_accelerators()
@reject_vdpa_instances(instance_actions.SUSPEND)
@reject_vdpa_instances(
instance_actions.SUSPEND, until=MIN_COMPUTE_VDPA_HOTPLUG_LIVE_MIGRATION
)
@block_accelerators()
@reject_sev_instances(instance_actions.SUSPEND)
@check_instance_lock
@ -4674,6 +4675,9 @@ class API:
self.compute_rpcapi.suspend_instance(context, instance)
@check_instance_lock
@reject_vdpa_instances(
instance_actions.RESUME, until=MIN_COMPUTE_VDPA_HOTPLUG_LIVE_MIGRATION
)
@check_instance_state(vm_state=[vm_states.SUSPENDED])
def resume(self, context, instance):
"""Resume the given instance."""
@ -5410,7 +5414,6 @@ class API:
)
def detach_interface(self, context, instance, port_id):
"""Detach an network adapter from an instance."""
for vif in instance.get_network_info():
if vif['id'] == port_id:
if vif['vnic_type'] in (
@ -5462,7 +5465,10 @@ class API:
@block_extended_resource_request
@block_port_accelerators()
@reject_vdpa_instances(instance_actions.LIVE_MIGRATION)
@reject_vdpa_instances(
instance_actions.LIVE_MIGRATION,
until=MIN_COMPUTE_VDPA_HOTPLUG_LIVE_MIGRATION
)
@block_accelerators()
@reject_vtpm_instances(instance_actions.LIVE_MIGRATION)
@reject_sev_instances(instance_actions.LIVE_MIGRATION)

View File

@ -122,20 +122,20 @@ VNIC_TYPE_REMOTE_MANAGED = "remote-managed"
# selected compute node.
VNIC_TYPES_SRIOV = (
VNIC_TYPE_DIRECT, VNIC_TYPE_MACVTAP, VNIC_TYPE_DIRECT_PHYSICAL,
VNIC_TYPE_VIRTIO_FORWARDER, VNIC_TYPE_VDPA, VNIC_TYPE_REMOTE_MANAGED)
VNIC_TYPE_VIRTIO_FORWARDER, VNIC_TYPE_VDPA, VNIC_TYPE_REMOTE_MANAGED
)
# Define list of ports which are passthrough to the guest
# and need a special treatment on snapshot and suspend/resume
VNIC_TYPES_DIRECT_PASSTHROUGH = (VNIC_TYPE_DIRECT,
VNIC_TYPE_DIRECT_PHYSICAL,
VNIC_TYPE_ACCELERATOR_DIRECT,
VNIC_TYPE_ACCELERATOR_DIRECT_PHYSICAL,
VNIC_TYPE_REMOTE_MANAGED)
VNIC_TYPES_DIRECT_PASSTHROUGH = (
VNIC_TYPE_DIRECT, VNIC_TYPE_DIRECT_PHYSICAL,
VNIC_TYPE_ACCELERATOR_DIRECT, VNIC_TYPE_ACCELERATOR_DIRECT_PHYSICAL,
VNIC_TYPE_REMOTE_MANAGED, VNIC_TYPE_VDPA
)
# Define list of ports which contains devices managed by cyborg.
VNIC_TYPES_ACCELERATOR = (
VNIC_TYPE_ACCELERATOR_DIRECT,
VNIC_TYPE_ACCELERATOR_DIRECT_PHYSICAL
VNIC_TYPE_ACCELERATOR_DIRECT, VNIC_TYPE_ACCELERATOR_DIRECT_PHYSICAL
)
# Constants for the 'vif_model' values

View File

@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__)
# NOTE(danms): This is the global service version counter
SERVICE_VERSION = 62
SERVICE_VERSION = 63
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@ -219,6 +219,9 @@ SERVICE_VERSION_HISTORY = (
# Version 62: Compute RPC v6.0:
# Add support for VDPA port attach/detach
{'compute_rpc': '6.0'},
# Version 63: Compute RPC v6.0:
# Add support for VDPA hotplug live migration and suspend/resume
{'compute_rpc': '6.0'},
)
# This is used to raise an error at service startup if older than N-1 computes

View File

@ -561,8 +561,8 @@ class InstanceHelperMixin:
self.api.post_server_action(
server['id'],
{'os-migrateLive': {'host': None, 'block_migration': 'auto'}})
self._wait_for_state_change(server, server_expected_state)
self._wait_for_migration_status(server, [migration_expected_state])
return self._wait_for_state_change(server, server_expected_state)
_live_migrate_server = _live_migrate

View File

@ -1062,7 +1062,7 @@ class SRIOVAttachDetachTest(_PCIServersTestBase):
self.neutron.sriov_pf_port2['id'])
class VDPAServersTest(_PCIServersTestBase):
class VDPAServersTest(_PCIServersWithMigrationTestBase):
# this is needed for os_compute_api:os-migrate-server:migrate policy
ADMIN_API = True
@ -1094,7 +1094,6 @@ class VDPAServersTest(_PCIServersTestBase):
def setUp(self):
super().setUp()
# The ultimate base class _IntegratedTestBase uses NeutronFixture but
# we need a bit more intelligent neutron for these tests. Applying the
# new fixture here means that we re-stub what the previous neutron
@ -1180,7 +1179,6 @@ class VDPAServersTest(_PCIServersTestBase):
expected = """
<interface type="vdpa">
<mac address="b5:bc:2e:e7:51:ee"/>
<model type="virtio"/>
<source dev="/dev/vhost-vdpa-3"/>
</interface>"""
actual = etree.tostring(elem, encoding='unicode')
@ -1568,8 +1566,62 @@ class VDPAServersTest(_PCIServersTestBase):
self.assertEqual(
dest, server['OS-EXT-SRV-ATTR:hypervisor_hostname'])
def test_suspend(self):
self._test_common(self._suspend_server)
def test_suspend_and_resume_service_version_62(self):
with mock.patch(
"nova.objects.service.get_minimum_version_all_cells",
return_value=62
):
self._test_common(self._suspend_server)
def test_suspend_and_resume(self):
source = self.start_vdpa_compute(hostname='source')
vdpa_port, server = self._create_port_and_server()
num_pci = self.NUM_PFS + self.NUM_VFS
self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2)
server = self._suspend_server(server)
self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2)
self.assertEqual('SUSPENDED', server['status'])
server = self._resume_server(server)
self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2)
self.assertEqual('ACTIVE', server['status'])
def test_live_migrate_service_version_62(self):
with mock.patch(
"nova.objects.service.get_minimum_version_all_cells",
return_value=62
):
self._test_common(self._live_migrate)
def test_live_migrate(self):
source = self.start_vdpa_compute(hostname='source')
dest = self.start_vdpa_compute(hostname='dest')
num_pci = self.NUM_PFS + self.NUM_VFS
self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci)
self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci)
# ensure we boot the vm on the "source" compute
self.api.put_service(
self.computes['dest'].service_ref.uuid, {'status': 'disabled'})
vdpa_port, server = self._create_port_and_server()
self.assertEqual(
source, server['OS-EXT-SRV-ATTR:hypervisor_hostname'])
self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2)
# enable the dest we do not need to disable the source since cold
# migrate wont happen to the same host in the libvirt driver
self.api.put_service(
self.computes['dest'].service_ref.uuid, {'status': 'enabled'})
with mock.patch(
'nova.virt.libvirt.LibvirtDriver.'
'_detach_direct_passthrough_vifs'
):
server = self._live_migrate(server)
self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci)
self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci - 2)
self.assertEqual(
dest, server['OS-EXT-SRV-ATTR:hypervisor_hostname'])
class PCIServersTest(_PCIServersTestBase):

View File

@ -71,6 +71,6 @@ class TestRollbackWithHWOffloadedOVS(
# Live migrate the instance to another host
self._live_migrate(self.server,
migration_expected_state='failed',
server_expected_state='MIGRATING')
server_expected_state='ACTIVE')
mlpr.assert_not_called()
mlpp.assert_called_once()

View File

@ -0,0 +1,25 @@
---
features:
- |
vDPA support was first introduced in the 23.0.0 (Wallaby)
release with limited instance lifecycle operations. Nova now supports
all instance lifecycle operations including suspend, attach/detach
and hot-plug live migration.
QEMU and the Linux kernel do not currently support transparent
live migration of vDPA devices at this time. Hot-plug live migration
unplugs the VDPA device on the source host before the VM is live migrated
and automatically hot-plugs the device on the destination after the
migration. While this can lead to packet loss it enable live migration
to be used when needed until transparent live migration can be added
in a future release.
VDPA Hot-plug live migration requires all compute services to be upgraded
to service level 63 to be enabled. Similarly suspend resume need service
level 63 and attach/detach require service level 62.
As such it will not be available to use during a rolling upgrade but will
become available when all host are upgraded to the 26.0.0 (Zed) release.
With the addition of these features, all instance lifecycle operations are
now valid for VMs with VDPA neutron ports.