Add VDPA support for suspend and livemigrate
This change append vnic-type vdpa to the list of passthough vnic types and removes the api blocks This should enable the existing suspend and live migrate code to properly manage vdpa interfaces enabling "hot plug" live migrations similar to direct sr-iov. Implements: blueprint vdpa-suspend-detach-and-live-migrate Change-Id: I878a9609ce0d84f7e3c2fef99e369b34d627a0df
This commit is contained in:
parent
51a970af37
commit
0aad338b1c
@ -7,8 +7,8 @@ Using ports vnic_type='vdpa'
|
||||
|
||||
.. versionadded:: 26.0.0 (Zed)
|
||||
|
||||
Added support for most instance move operations (except live migration),
|
||||
and the interface attach/detach operations.
|
||||
Added support for all instance move operations,
|
||||
and the interface attach/detach, and suspend/resume operations.
|
||||
|
||||
.. important::
|
||||
The functionality described below is only supported by the
|
||||
@ -76,3 +76,11 @@ in neutron and passed into nova as part of the server create request.
|
||||
|
||||
openstack port create --network <my network> --vnic-type vdpa vdpa-port
|
||||
openstack server create --flavor <my-flavor> --image <my-image> --port <vdpa-port uuid> vdpa-vm
|
||||
|
||||
vDPA live migration
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
At this time QEMU and the ``vhost-vdpa`` kernel module do not support transparent
|
||||
live migration of vm with vdpa ports. To enable live migration of VMs with
|
||||
vDPA interfaces the existing SR-IOV hotplug live migration procedure has been
|
||||
extended to include ``vnic_type='vdpa'`` interfaces.
|
||||
|
@ -119,6 +119,8 @@ MIN_COMPUTE_INT_ATTACH_WITH_EXTENDED_RES_REQ = 60
|
||||
|
||||
SUPPORT_VNIC_TYPE_REMOTE_MANAGED = 61
|
||||
MIN_COMPUTE_VDPA_ATTACH_DETACH = 62
|
||||
MIN_COMPUTE_VDPA_HOTPLUG_LIVE_MIGRATION = 63
|
||||
|
||||
|
||||
# FIXME(danms): Keep a global cache of the cells we find the
|
||||
# first time we look. This needs to be refreshed on a timer or
|
||||
@ -4657,11 +4659,10 @@ class API:
|
||||
return self.compute_rpcapi.get_instance_diagnostics(context,
|
||||
instance=instance)
|
||||
|
||||
# FIXME(sean-k-mooney): Suspend does not work because we do not unplug
|
||||
# the vDPA devices before calling managed save as we do with SR-IOV
|
||||
# devices
|
||||
@block_port_accelerators()
|
||||
@reject_vdpa_instances(instance_actions.SUSPEND)
|
||||
@reject_vdpa_instances(
|
||||
instance_actions.SUSPEND, until=MIN_COMPUTE_VDPA_HOTPLUG_LIVE_MIGRATION
|
||||
)
|
||||
@block_accelerators()
|
||||
@reject_sev_instances(instance_actions.SUSPEND)
|
||||
@check_instance_lock
|
||||
@ -4674,6 +4675,9 @@ class API:
|
||||
self.compute_rpcapi.suspend_instance(context, instance)
|
||||
|
||||
@check_instance_lock
|
||||
@reject_vdpa_instances(
|
||||
instance_actions.RESUME, until=MIN_COMPUTE_VDPA_HOTPLUG_LIVE_MIGRATION
|
||||
)
|
||||
@check_instance_state(vm_state=[vm_states.SUSPENDED])
|
||||
def resume(self, context, instance):
|
||||
"""Resume the given instance."""
|
||||
@ -5410,7 +5414,6 @@ class API:
|
||||
)
|
||||
def detach_interface(self, context, instance, port_id):
|
||||
"""Detach an network adapter from an instance."""
|
||||
|
||||
for vif in instance.get_network_info():
|
||||
if vif['id'] == port_id:
|
||||
if vif['vnic_type'] in (
|
||||
@ -5462,7 +5465,10 @@ class API:
|
||||
|
||||
@block_extended_resource_request
|
||||
@block_port_accelerators()
|
||||
@reject_vdpa_instances(instance_actions.LIVE_MIGRATION)
|
||||
@reject_vdpa_instances(
|
||||
instance_actions.LIVE_MIGRATION,
|
||||
until=MIN_COMPUTE_VDPA_HOTPLUG_LIVE_MIGRATION
|
||||
)
|
||||
@block_accelerators()
|
||||
@reject_vtpm_instances(instance_actions.LIVE_MIGRATION)
|
||||
@reject_sev_instances(instance_actions.LIVE_MIGRATION)
|
||||
|
@ -122,20 +122,20 @@ VNIC_TYPE_REMOTE_MANAGED = "remote-managed"
|
||||
# selected compute node.
|
||||
VNIC_TYPES_SRIOV = (
|
||||
VNIC_TYPE_DIRECT, VNIC_TYPE_MACVTAP, VNIC_TYPE_DIRECT_PHYSICAL,
|
||||
VNIC_TYPE_VIRTIO_FORWARDER, VNIC_TYPE_VDPA, VNIC_TYPE_REMOTE_MANAGED)
|
||||
VNIC_TYPE_VIRTIO_FORWARDER, VNIC_TYPE_VDPA, VNIC_TYPE_REMOTE_MANAGED
|
||||
)
|
||||
|
||||
# Define list of ports which are passthrough to the guest
|
||||
# and need a special treatment on snapshot and suspend/resume
|
||||
VNIC_TYPES_DIRECT_PASSTHROUGH = (VNIC_TYPE_DIRECT,
|
||||
VNIC_TYPE_DIRECT_PHYSICAL,
|
||||
VNIC_TYPE_ACCELERATOR_DIRECT,
|
||||
VNIC_TYPE_ACCELERATOR_DIRECT_PHYSICAL,
|
||||
VNIC_TYPE_REMOTE_MANAGED)
|
||||
VNIC_TYPES_DIRECT_PASSTHROUGH = (
|
||||
VNIC_TYPE_DIRECT, VNIC_TYPE_DIRECT_PHYSICAL,
|
||||
VNIC_TYPE_ACCELERATOR_DIRECT, VNIC_TYPE_ACCELERATOR_DIRECT_PHYSICAL,
|
||||
VNIC_TYPE_REMOTE_MANAGED, VNIC_TYPE_VDPA
|
||||
)
|
||||
|
||||
# Define list of ports which contains devices managed by cyborg.
|
||||
VNIC_TYPES_ACCELERATOR = (
|
||||
VNIC_TYPE_ACCELERATOR_DIRECT,
|
||||
VNIC_TYPE_ACCELERATOR_DIRECT_PHYSICAL
|
||||
VNIC_TYPE_ACCELERATOR_DIRECT, VNIC_TYPE_ACCELERATOR_DIRECT_PHYSICAL
|
||||
)
|
||||
|
||||
# Constants for the 'vif_model' values
|
||||
|
@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# NOTE(danms): This is the global service version counter
|
||||
SERVICE_VERSION = 62
|
||||
SERVICE_VERSION = 63
|
||||
|
||||
|
||||
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
|
||||
@ -219,6 +219,9 @@ SERVICE_VERSION_HISTORY = (
|
||||
# Version 62: Compute RPC v6.0:
|
||||
# Add support for VDPA port attach/detach
|
||||
{'compute_rpc': '6.0'},
|
||||
# Version 63: Compute RPC v6.0:
|
||||
# Add support for VDPA hotplug live migration and suspend/resume
|
||||
{'compute_rpc': '6.0'},
|
||||
)
|
||||
|
||||
# This is used to raise an error at service startup if older than N-1 computes
|
||||
|
@ -561,8 +561,8 @@ class InstanceHelperMixin:
|
||||
self.api.post_server_action(
|
||||
server['id'],
|
||||
{'os-migrateLive': {'host': None, 'block_migration': 'auto'}})
|
||||
self._wait_for_state_change(server, server_expected_state)
|
||||
self._wait_for_migration_status(server, [migration_expected_state])
|
||||
return self._wait_for_state_change(server, server_expected_state)
|
||||
|
||||
_live_migrate_server = _live_migrate
|
||||
|
||||
|
@ -1062,7 +1062,7 @@ class SRIOVAttachDetachTest(_PCIServersTestBase):
|
||||
self.neutron.sriov_pf_port2['id'])
|
||||
|
||||
|
||||
class VDPAServersTest(_PCIServersTestBase):
|
||||
class VDPAServersTest(_PCIServersWithMigrationTestBase):
|
||||
|
||||
# this is needed for os_compute_api:os-migrate-server:migrate policy
|
||||
ADMIN_API = True
|
||||
@ -1094,7 +1094,6 @@ class VDPAServersTest(_PCIServersTestBase):
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
|
||||
# The ultimate base class _IntegratedTestBase uses NeutronFixture but
|
||||
# we need a bit more intelligent neutron for these tests. Applying the
|
||||
# new fixture here means that we re-stub what the previous neutron
|
||||
@ -1180,7 +1179,6 @@ class VDPAServersTest(_PCIServersTestBase):
|
||||
expected = """
|
||||
<interface type="vdpa">
|
||||
<mac address="b5:bc:2e:e7:51:ee"/>
|
||||
<model type="virtio"/>
|
||||
<source dev="/dev/vhost-vdpa-3"/>
|
||||
</interface>"""
|
||||
actual = etree.tostring(elem, encoding='unicode')
|
||||
@ -1568,8 +1566,62 @@ class VDPAServersTest(_PCIServersTestBase):
|
||||
self.assertEqual(
|
||||
dest, server['OS-EXT-SRV-ATTR:hypervisor_hostname'])
|
||||
|
||||
def test_suspend(self):
|
||||
self._test_common(self._suspend_server)
|
||||
def test_suspend_and_resume_service_version_62(self):
|
||||
with mock.patch(
|
||||
"nova.objects.service.get_minimum_version_all_cells",
|
||||
return_value=62
|
||||
):
|
||||
self._test_common(self._suspend_server)
|
||||
|
||||
def test_suspend_and_resume(self):
|
||||
source = self.start_vdpa_compute(hostname='source')
|
||||
vdpa_port, server = self._create_port_and_server()
|
||||
num_pci = self.NUM_PFS + self.NUM_VFS
|
||||
self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2)
|
||||
server = self._suspend_server(server)
|
||||
self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2)
|
||||
self.assertEqual('SUSPENDED', server['status'])
|
||||
server = self._resume_server(server)
|
||||
self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2)
|
||||
self.assertEqual('ACTIVE', server['status'])
|
||||
|
||||
def test_live_migrate_service_version_62(self):
|
||||
with mock.patch(
|
||||
"nova.objects.service.get_minimum_version_all_cells",
|
||||
return_value=62
|
||||
):
|
||||
self._test_common(self._live_migrate)
|
||||
|
||||
def test_live_migrate(self):
|
||||
source = self.start_vdpa_compute(hostname='source')
|
||||
dest = self.start_vdpa_compute(hostname='dest')
|
||||
|
||||
num_pci = self.NUM_PFS + self.NUM_VFS
|
||||
self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci)
|
||||
self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci)
|
||||
|
||||
# ensure we boot the vm on the "source" compute
|
||||
self.api.put_service(
|
||||
self.computes['dest'].service_ref.uuid, {'status': 'disabled'})
|
||||
vdpa_port, server = self._create_port_and_server()
|
||||
self.assertEqual(
|
||||
source, server['OS-EXT-SRV-ATTR:hypervisor_hostname'])
|
||||
|
||||
self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2)
|
||||
# enable the dest we do not need to disable the source since cold
|
||||
# migrate wont happen to the same host in the libvirt driver
|
||||
self.api.put_service(
|
||||
self.computes['dest'].service_ref.uuid, {'status': 'enabled'})
|
||||
|
||||
with mock.patch(
|
||||
'nova.virt.libvirt.LibvirtDriver.'
|
||||
'_detach_direct_passthrough_vifs'
|
||||
):
|
||||
server = self._live_migrate(server)
|
||||
self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci)
|
||||
self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci - 2)
|
||||
self.assertEqual(
|
||||
dest, server['OS-EXT-SRV-ATTR:hypervisor_hostname'])
|
||||
|
||||
|
||||
class PCIServersTest(_PCIServersTestBase):
|
||||
|
@ -71,6 +71,6 @@ class TestRollbackWithHWOffloadedOVS(
|
||||
# Live migrate the instance to another host
|
||||
self._live_migrate(self.server,
|
||||
migration_expected_state='failed',
|
||||
server_expected_state='MIGRATING')
|
||||
server_expected_state='ACTIVE')
|
||||
mlpr.assert_not_called()
|
||||
mlpp.assert_called_once()
|
||||
|
@ -0,0 +1,25 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
vDPA support was first introduced in the 23.0.0 (Wallaby)
|
||||
release with limited instance lifecycle operations. Nova now supports
|
||||
all instance lifecycle operations including suspend, attach/detach
|
||||
and hot-plug live migration.
|
||||
|
||||
QEMU and the Linux kernel do not currently support transparent
|
||||
live migration of vDPA devices at this time. Hot-plug live migration
|
||||
unplugs the VDPA device on the source host before the VM is live migrated
|
||||
and automatically hot-plugs the device on the destination after the
|
||||
migration. While this can lead to packet loss it enable live migration
|
||||
to be used when needed until transparent live migration can be added
|
||||
in a future release.
|
||||
|
||||
VDPA Hot-plug live migration requires all compute services to be upgraded
|
||||
to service level 63 to be enabled. Similarly suspend resume need service
|
||||
level 63 and attach/detach require service level 62.
|
||||
As such it will not be available to use during a rolling upgrade but will
|
||||
become available when all host are upgraded to the 26.0.0 (Zed) release.
|
||||
|
||||
With the addition of these features, all instance lifecycle operations are
|
||||
now valid for VMs with VDPA neutron ports.
|
||||
|
Loading…
Reference in New Issue
Block a user