Support live migration with vpmem

1. Check if the cluster supports live migration with vpmem
2. On source host we generate new dest xml with vpmem info stored in
   migration_context.new_resources.
3. If there are vpmems, cleanup them on host/destination when live
   migration succeeds/fails

Change-Id: I5c346e690148678a2f0dc63f4f516a944c3db8cd
Implements: blueprint support-live-migration-with-virtual-persistent-memory
This commit is contained in:
LuyaoZhong 2020-03-27 08:07:17 +00:00
parent 990a26ef1f
commit 4bd5af66b5
10 changed files with 268 additions and 38 deletions

View File

@ -7664,7 +7664,7 @@ class ComputeManager(manager.Manager):
LOG.debug('destination check data is %s', dest_check_data)
try:
allocs = self.reportclient.get_allocations_for_consumer(
ctxt, instance.uuid)
ctxt, instance.uuid)
migrate_data = self.compute_rpcapi.check_can_live_migrate_source(
ctxt, instance, dest_check_data)
if ('src_supports_numa_live_migration' in migrate_data and
@ -8249,9 +8249,10 @@ class ComputeManager(manager.Manager):
self.driver.live_migration_abort(instance)
self._notify_live_migrate_abort_end(context, instance)
def _live_migration_cleanup_flags(self, migrate_data):
"""Determine whether disks or instance path need to be cleaned up after
live migration (at source on success, at destination on rollback)
def _live_migration_cleanup_flags(self, migrate_data, migr_ctxt=None):
"""Determine whether disks, instance path or other resources
need to be cleaned up after live migration (at source on success,
at destination on rollback)
Block migration needs empty image at destination host before migration
starts, so if any failure occurs, any empty images has to be deleted.
@ -8260,7 +8261,11 @@ class ComputeManager(manager.Manager):
newly created instance-xxx dir on the destination as a part of its
rollback process
There may be other resources which need cleanup; currently this is
limited to vPMEM devices with the libvirt driver.
:param migrate_data: implementation specific data
:param migr_ctxt: specific resources stored in migration_context
:returns: (bool, bool) -- do_cleanup, destroy_disks
"""
# NOTE(pkoniszewski): block migration specific params are set inside
@ -8270,11 +8275,20 @@ class ComputeManager(manager.Manager):
do_cleanup = False
destroy_disks = False
if isinstance(migrate_data, migrate_data_obj.LibvirtLiveMigrateData):
has_vpmem = False
if migr_ctxt and migr_ctxt.old_resources:
for resource in migr_ctxt.old_resources:
if ('metadata' in resource and
isinstance(resource.metadata,
objects.LibvirtVPMEMDevice)):
has_vpmem = True
break
# No instance booting at source host, but instance dir
# must be deleted for preparing next block migration
# must be deleted for preparing next live migration w/o shared
# storage
do_cleanup = not migrate_data.is_shared_instance_path
# vpmem must be cleanped
do_cleanup = not migrate_data.is_shared_instance_path or has_vpmem
destroy_disks = not migrate_data.is_shared_block_storage
elif isinstance(migrate_data, migrate_data_obj.XenapiLiveMigrateData):
do_cleanup = migrate_data.block_migration
@ -8427,7 +8441,7 @@ class ComputeManager(manager.Manager):
source_node = instance.node
do_cleanup, destroy_disks = self._live_migration_cleanup_flags(
migrate_data)
migrate_data, migr_ctxt=instance.migration_context)
if do_cleanup:
LOG.debug('Calling driver.cleanup from _post_live_migration',
@ -8727,7 +8741,7 @@ class ComputeManager(manager.Manager):
bdms=bdms)
do_cleanup, destroy_disks = self._live_migration_cleanup_flags(
migrate_data)
migrate_data, migr_ctxt=instance.migration_context)
if do_cleanup:
self.compute_rpcapi.rollback_live_migration_at_destination(
@ -8867,6 +8881,9 @@ class ComputeManager(manager.Manager):
# check_can_live_migrate_destination()
self.rt.free_pci_device_claims_for_instance(context, instance)
# NOTE(luyao): Apply migration_context temporarily since it's
# on destination host, we rely on instance object to cleanup
# specific resources like vpmem
with instance.mutated_migration_context():
self.driver.rollback_live_migration_at_destination(
context, instance, network_info, block_device_info,

View File

@ -46,6 +46,17 @@ def supports_vif_related_pci_allocations(context, host):
return svc.version >= 36
def supports_vpmem_live_migration(context):
"""Checks if the commpute host service is new enough to support
instance live migration with virtual persistent memory.
:param context: The user request context.
:returns: True if the compute hosts are new enough to support live
migration with vpmem
"""
return objects.Service.get_minimum_version(context, 'nova-compute') >= 51
class LiveMigrationTask(base.TaskBase):
def __init__(self, context, instance, destination,
block_migration, disk_over_commit, migration, compute_rpcapi,
@ -261,11 +272,16 @@ class LiveMigrationTask(base.TaskBase):
if not self.instance.resources:
return
has_vpmem = False
for resource in self.instance.resources:
if resource.resource_class.startswith("CUSTOM_PMEM_NAMESPACE_"):
raise exception.MigrationPreCheckError(
reason="Cannot live migration with virtual persistent "
"memory, the operation is not supported.")
has_vpmem = True
break
if has_vpmem and not supports_vpmem_live_migration(self.context):
raise exception.MigrationPreCheckError(
reason="Cannot live migrate with virtual persistent memory, "
"the operation is not supported.")
def _check_host_is_up(self, host):
service = objects.Service.get_by_compute_host(self.context, host)

View File

@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__)
# NOTE(danms): This is the global service version counter
SERVICE_VERSION = 50
SERVICE_VERSION = 51
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@ -183,6 +183,8 @@ SERVICE_VERSION_HISTORY = (
# Version 50: Compute RPC v5.11:
# Add accel_uuids (accelerator requests) param to build_and_run_instance
{'compute_rpc': '5.11'},
# Version 51: Add support for live migration with vpmem
{'compute_rpc': '5.11'},
)

View File

@ -10137,6 +10137,25 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
instance,
migration.id)
def test_live_migration_cleanup_flags_shared_path_and_vpmem_libvirt(self):
migrate_data = objects.LibvirtLiveMigrateData(
is_shared_block_storage=False,
is_shared_instance_path=True)
migr_ctxt = objects.MigrationContext()
vpmem_resource = objects.Resource(
provider_uuid=uuids.rp_uuid,
resource_class="CUSTOM_PMEM_NAMESPACE_4GB",
identifier='ns_0', metadata=objects.LibvirtVPMEMDevice(
label='4GB',
name='ns_0', devpath='/dev/dax0.0',
size=4292870144, align=2097152))
migr_ctxt.old_resources = objects.ResourceList(
objects=[vpmem_resource])
do_cleanup, destroy_disks = self.compute._live_migration_cleanup_flags(
migrate_data, migr_ctxt)
self.assertTrue(do_cleanup)
self.assertTrue(destroy_disks)
def test_live_migration_cleanup_flags_block_migrate_libvirt(self):
migrate_data = objects.LibvirtLiveMigrateData(
is_shared_block_storage=False,

View File

@ -833,6 +833,15 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
_test, pci_requests, True, True)
def test_check_can_migrate_specific_resources(self):
"""Test _check_can_migrate_specific_resources allows live migration
with vpmem.
"""
@mock.patch.object(live_migrate, 'supports_vpmem_live_migration')
def _test(resources, supp_lm_vpmem_retval, mock_support_lm_vpmem):
self.instance.resources = resources
mock_support_lm_vpmem.return_value = supp_lm_vpmem_retval
self.task._check_can_migrate_specific_resources()
vpmem_0 = objects.LibvirtVPMEMDevice(
label='4GB', name='ns_0', devpath='/dev/dax0.0',
size=4292870144, align=2097152)
@ -840,7 +849,11 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
provider_uuid=uuids.rp,
resource_class="CUSTOM_PMEM_NAMESPACE_4GB",
identifier='ns_0', metadata=vpmem_0)
self.instance.resources = objects.ResourceList(
resources = objects.ResourceList(
objects=[resource_0])
_test(None, False)
_test(None, True)
_test(resources, True)
self.assertRaises(exception.MigrationPreCheckError,
self.task._check_can_migrate_specific_resources)
_test, resources, False)

View File

@ -11218,7 +11218,8 @@ class LibvirtConnTestCase(test.NoDBTestCase,
drvr._live_migration_uri(target_connection),
params=params, flags=0)
mock_updated_guest_xml.assert_called_once_with(
guest, migrate_data, mock.ANY, get_vif_config=None)
guest, migrate_data, mock.ANY, get_vif_config=None,
new_resources=None)
def test_live_migration_update_vifs_xml(self):
"""Tests that when migrate_data.vifs is populated, the destination
@ -11245,7 +11246,8 @@ class LibvirtConnTestCase(test.NoDBTestCase,
fake_xml = '<domain type="qemu"/>'
def fake_get_updated_guest_xml(guest, migrate_data, get_volume_config,
get_vif_config=None):
get_vif_config=None,
new_resources=None):
self.assertIsNotNone(get_vif_config)
return fake_xml
@ -25894,6 +25896,29 @@ class LibvirtPMEMNamespaceTests(test.NoDBTestCase):
self.assertEqual('SMALL', vpmems[1].label)
self.assertEqual('SMALL', vpmems[2].label)
@mock.patch('nova.virt.hardware.get_vpmems')
def test_sorted_migrating_vpmem_resources(self, mock_labels):
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
instance = fake_instance.fake_instance_obj(self.context)
instance.flavor = objects.Flavor(
name='m1.small', memory_mb=2048, vcpus=2, root_gb=10,
ephemeral_gb=20, swap=0, extra_specs={
'hw:pmem': 'SMALL,4GB,SMALL'})
mock_labels.return_value = ['SMALL', '4GB', 'SMALL']
migr_context = objects.MigrationContext()
# original order is '4GB' 'SMALL' 'SMALL'
migr_context.new_resources = objects.ResourceList(objects=[
self.resource_0, self.resource_1, self.resource_2])
instance.migration_context = migr_context
new_resources = drvr._sorted_migrating_resources(
instance, instance.flavor)
# ordered vpmems are 'SMAL' '4GB' 'SMALL'
expected_new_resources = objects.ResourceList(objects=[
self.resource_1, self.resource_0, self.resource_2])
for i in range(3):
self.assertEqual(expected_new_resources[i], new_resources[i])
@mock.patch('nova.privsep.libvirt.cleanup_vpmem')
def test_cleanup_vpmems(self, mock_cleanup_vpmem):
vpmems = [self.vpmem_0, self.vpmem_1, self.vpmem_2]

View File

@ -116,6 +116,70 @@ class UtilityMigrationTestCase(test.NoDBTestCase):
mock_memory_backing.assert_called_once_with(mock.ANY, data)
self.assertEqual(1, mock_tostring.called)
def test_update_device_resources_xml_vpmem(self):
# original xml for vpmems, /dev/dax0.1 and /dev/dax0.2 here
# are vpmem device path on source host
old_xml = textwrap.dedent("""
<domain>
<devices>
<memory model='nvdimm'>
<source>
<path>/dev/dax0.1</path>
<alignsize>2048</alignsize>
<pmem>on</pmem>
</source>
<target>
<size>4192256</size>
<label>
<size>2048</size>
</label>
<node>0</node>
</target>
</memory>
<memory model='nvdimm'>
<source>
<path>/dev/dax0.2</path>
<alignsize>2048</alignsize>
<pmem>on</pmem>
</source>
<target>
<size>4192256</size>
<label>
<size>2048</size>
</label>
<node>0</node>
</target>
</memory>
</devices>
</domain>""")
doc = etree.fromstring(old_xml)
vpmem_resource_0 = objects.Resource(
provider_uuid=uuids.rp_uuid,
resource_class="CUSTOM_PMEM_NAMESPACE_4GB",
identifier='ns_0',
metadata= objects.LibvirtVPMEMDevice(
label='4GB', name='ns_0', devpath='/dev/dax1.0',
size=4292870144, align=2097152))
vpmem_resource_1 = objects.Resource(
provider_uuid=uuids.rp_uuid,
resource_class="CUSTOM_PMEM_NAMESPACE_4GB",
identifier='ns_0',
metadata= objects.LibvirtVPMEMDevice(
label='4GB', name='ns_1', devpath='/dev/dax2.0',
size=4292870144, align=2097152))
# new_resources contains vpmems claimed on destination,
# /dev/dax1.0 and /dev/dax2.0 are where vpmem data is migrated to
new_resources = objects.ResourceList(
objects=[vpmem_resource_0, vpmem_resource_1])
res = etree.tostring(migration._update_device_resources_xml(
copy.deepcopy(doc), new_resources),
encoding='unicode')
# we expect vpmem info will be updated in xml after invoking
# _update_device_resources_xml
new_xml = old_xml.replace("/dev/dax0.1", "/dev/dax1.0")
new_xml = new_xml.replace("/dev/dax0.2", "/dev/dax2.0")
self.assertXmlEqual(res, new_xml)
def test_update_numa_xml(self):
xml = textwrap.dedent("""
<domain>

View File

@ -5858,30 +5858,18 @@ class LibvirtDriver(driver.ComputeDriver):
return guest
def _get_ordered_vpmems(self, instance, flavor):
ordered_vpmems = []
vpmems = self._get_vpmems(instance)
labels = hardware.get_vpmems(flavor)
for label in labels:
for vpmem in vpmems:
if vpmem.label == label:
ordered_vpmems.append(vpmem)
vpmems.remove(vpmem)
break
resources = self._get_resources(instance)
ordered_vpmem_resources = self._get_ordered_vpmem_resources(
resources, flavor)
ordered_vpmems = [self._vpmems_by_name[resource.identifier]
for resource in ordered_vpmem_resources]
return ordered_vpmems
def _get_vpmems(self, instance, prefix=None):
vpmems = []
resources = instance.resources
if prefix == 'old' and instance.migration_context:
if 'old_resources' in instance.migration_context:
resources = instance.migration_context.old_resources
if not resources:
return vpmems
for resource in resources:
rc = resource.resource_class
if rc.startswith("CUSTOM_PMEM_NAMESPACE_"):
vpmem = self._vpmems_by_name[resource.identifier]
vpmems.append(vpmem)
resources = self._get_resources(instance, prefix=prefix)
vpmem_resources = self._get_vpmem_resources(resources)
vpmems = [self._vpmems_by_name[resource.identifier]
for resource in vpmem_resources]
return vpmems
def _guest_add_vpmems(self, guest, vpmems):
@ -8143,6 +8131,53 @@ class LibvirtDriver(driver.ComputeDriver):
claim.image_meta)
return migrate_data
def _get_resources(self, instance, prefix=None):
resources = []
if prefix:
migr_context = instance.migration_context
attr_name = prefix + 'resources'
if migr_context and attr_name in migr_context:
resources = getattr(migr_context, attr_name) or []
else:
resources = instance.resources or []
return resources
def _get_vpmem_resources(self, resources):
vpmem_resources = []
for resource in resources:
if 'metadata' in resource and \
isinstance(resource.metadata, objects.LibvirtVPMEMDevice):
vpmem_resources.append(resource)
return vpmem_resources
def _get_ordered_vpmem_resources(self, resources, flavor):
vpmem_resources = self._get_vpmem_resources(resources)
ordered_vpmem_resources = []
labels = hardware.get_vpmems(flavor)
for label in labels:
for vpmem_resource in vpmem_resources:
if vpmem_resource.metadata.label == label:
ordered_vpmem_resources.append(vpmem_resource)
vpmem_resources.remove(vpmem_resource)
break
return ordered_vpmem_resources
def _sorted_migrating_resources(self, instance, flavor):
"""This method is used to sort instance.migration_context.new_resources
claimed on dest host, then the ordered new resources will be used to
update resources info (e.g. vpmems) in the new xml which is used for
live migration.
"""
resources = self._get_resources(instance, prefix='new_')
if not resources:
return
ordered_resources = []
ordered_vpmem_resources = self._get_ordered_vpmem_resources(
resources, flavor)
ordered_resources.extend(ordered_vpmem_resources)
ordered_resources_obj = objects.ResourceList(objects=ordered_resources)
return ordered_resources_obj
def _get_live_migrate_numa_info(self, instance_numa_topology, flavor,
image_meta):
"""Builds a LibvirtLiveMigrateNUMAInfo object to send to the source of
@ -8614,12 +8649,16 @@ class LibvirtDriver(driver.ComputeDriver):
host=self._host)
self._detach_direct_passthrough_vifs(context,
migrate_data, instance)
new_resources = None
if isinstance(instance, objects.Instance):
new_resources = self._sorted_migrating_resources(
instance, instance.flavor)
new_xml_str = libvirt_migrate.get_updated_guest_xml(
# TODO(sahid): It's not a really good idea to pass
# the method _get_volume_config and we should to find
# a way to avoid this in future.
guest, migrate_data, self._get_volume_config,
get_vif_config=get_vif_config)
get_vif_config=get_vif_config, new_resources=new_resources)
# NOTE(pkoniszewski): Because of precheck which blocks
# tunnelled block live migration with mapped volumes we
@ -8803,6 +8842,8 @@ class LibvirtDriver(driver.ComputeDriver):
n = 0
start = time.time()
is_post_copy_enabled = self._is_post_copy_enabled(migration_flags)
# vpmem does not support post copy
is_post_copy_enabled &= not bool(self._get_vpmems(instance))
while True:
info = guest.get_job_info()

View File

@ -25,6 +25,7 @@ from oslo_log import log as logging
from nova.compute import power_state
import nova.conf
from nova import exception
from nova import objects
from nova.virt import hardware
from nova.virt.libvirt import config as vconfig
@ -80,7 +81,7 @@ def serial_listen_ports(migrate_data):
def get_updated_guest_xml(guest, migrate_data, get_volume_config,
get_vif_config=None):
get_vif_config=None, new_resources=None):
xml_doc = etree.fromstring(guest.get_xml_desc(dump_migratable=True))
xml_doc = _update_graphics_xml(xml_doc, migrate_data)
xml_doc = _update_serial_xml(xml_doc, migrate_data)
@ -91,9 +92,33 @@ def get_updated_guest_xml(guest, migrate_data, get_volume_config,
xml_doc = _update_vif_xml(xml_doc, migrate_data, get_vif_config)
if 'dst_numa_info' in migrate_data:
xml_doc = _update_numa_xml(xml_doc, migrate_data)
if new_resources:
xml_doc = _update_device_resources_xml(xml_doc, new_resources)
return etree.tostring(xml_doc, encoding='unicode')
def _update_device_resources_xml(xml_doc, new_resources):
vpmems = []
for resource in new_resources:
if 'metadata' in resource:
res_meta = resource.metadata
if isinstance(res_meta, objects.LibvirtVPMEMDevice):
vpmems.append(res_meta)
# If there are other resources in the future, the xml should
# be updated here like vpmems
xml_doc = _update_vpmems_xml(xml_doc, vpmems)
return xml_doc
def _update_vpmems_xml(xml_doc, vpmems):
memory_devices = xml_doc.findall("./devices/memory")
for pos, memory_dev in enumerate(memory_devices):
if memory_dev.get('model') == 'nvdimm':
devpath = memory_dev.find('./source/path')
devpath.text = vpmems[pos].devpath
return xml_doc
def _update_numa_xml(xml_doc, migrate_data):
LOG.debug('_update_numa_xml input xml=%s',
etree.tostring(xml_doc, encoding='unicode', pretty_print=True))

View File

@ -0,0 +1,8 @@
---
features:
- |
The libvirt driver now supports live migration with virtual persistent
memory (vPMEM), which requires QEMU as hypervisor. In virtualization layer,
QEMU will copy vpmem over the network like volatile memory, due to the
typical large capacity of vPMEM, it may takes longer time for live
migration.