From d966ffabc373b4549ab47d3d3487f398073ce2bf Mon Sep 17 00:00:00 2001 From: Sean Mooney Date: Wed, 9 Jan 2019 17:19:39 +0000 Subject: [PATCH] libvirt: auto detach/attach sriov ports on migration - This patch detaches all directmode sriov interfaces before calculating the updated xml for the destination immediately before starting the migration. - This change modifies post_live_migration_at_destination to check if an instance has all interfecs defined in the guest xml and attaches the missing sriov interfaces if they are not present. - This change adds a release note for the sriov live migration feature. - This change extends the base virt driver interface with a new method rollback_live_migration_at_source and invokes it from rollback_live_migration in the compute manager. Change-Id: Ib61913d9d6ef6148170963463bb71c13f4272c5d Implements: blueprint libvirt-neutron-sriov-livemigration --- nova/compute/manager.py | 3 +- nova/tests/unit/virt/libvirt/test_driver.py | 88 +++++++++++++++++-- nova/virt/driver.py | 10 +++ nova/virt/libvirt/driver.py | 62 +++++++++++-- ...sriov-live-migration-0311dfb7102a48db.yaml | 25 ++++++ 5 files changed, 176 insertions(+), 12 deletions(-) create mode 100644 releasenotes/notes/sriov-live-migration-0311dfb7102a48db.yaml diff --git a/nova/compute/manager.py b/nova/compute/manager.py index de8ddb03d95e..e7bb7111987d 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -7081,7 +7081,8 @@ class ComputeManager(manager.Manager): # for nova-network) # NOTE(mriedem): This is a no-op for neutron. self.network_api.setup_networks_on_host(context, instance, self.host) - + self.driver.rollback_live_migration_at_source(context, instance, + migrate_data) bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( context, instance.uuid) for bdm in bdms: diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index f647915c5fac..a32305f062e8 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -9934,12 +9934,20 @@ class LibvirtConnTestCase(test.NoDBTestCase, guest xml is updated with the migrate_data.vifs configuration. """ instance = objects.Instance(**self.test_instance) + + source_vif_normal = network_model.VIF( + id=uuids.port_id, type=network_model.VIF_TYPE_OVS, + vnic_type=network_model.VNIC_TYPE_NORMAL, details={'foo': 'bar'}, + profile={'binding:host_id': 'fake-source-host'}) + + vif = objects.VIFMigrateData(port_id=uuids.port_id, + source_vif=source_vif_normal) migrate_data = objects.LibvirtLiveMigrateData( serial_listen_addr='', target_connect_addr=None, bdms=[], block_migration=False, - vifs=[objects.VIFMigrateData(port_id=uuids.port_id)]) + vifs=[vif]) drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) guest = libvirt_guest.Guest(mock.MagicMock()) @@ -9950,18 +9958,67 @@ class LibvirtConnTestCase(test.NoDBTestCase, self.assertIsNotNone(get_vif_config) return fake_xml + @mock.patch.object(drvr, "detach_interface") @mock.patch('nova.virt.libvirt.migration.get_updated_guest_xml', side_effect=fake_get_updated_guest_xml) @mock.patch.object(drvr._host, 'has_min_version', return_value=True) @mock.patch.object(guest, 'migrate') - def _test(migrate, has_min_version, get_updated_guest_xml): + def _test_normal(migrate, has_min_version, + get_updated_guest_xml, detach): drvr._live_migration_operation( self.context, instance, 'dest.host', False, migrate_data, guest, []) - self.assertEqual(1, get_updated_guest_xml.call_count) + get_updated_guest_xml.assert_called_once() migrate.assert_called() + detach.assert_not_called() - _test() + _test_normal() + + source_vif_direct = network_model.VIF( + id=uuids.port_id, type=network_model.VIF_TYPE_OVS, + vnic_type=network_model.VNIC_TYPE_DIRECT, details={'foo': 'bar'}, + profile={'binding:host_id': 'fake-source-host'}) + + vif_direct = objects.VIFMigrateData(port_id=uuids.port_id, + source_vif=source_vif_direct) + migrate_data = objects.LibvirtLiveMigrateData( + serial_listen_addr='', target_connect_addr=None, + bdms=[], block_migration=False, vifs=[vif_direct]) + + @mock.patch.object(drvr, "detach_interface") + @mock.patch('nova.virt.libvirt.migration.get_updated_guest_xml', + side_effect=fake_get_updated_guest_xml) + @mock.patch.object(drvr._host, 'has_min_version', return_value=True) + @mock.patch.object(guest, 'migrate') + def _test_direct(migrate, has_min_version, + get_updated_guest_xml, detach): + drvr._live_migration_operation( + self.context, instance, 'dest.host', False, + migrate_data, guest, []) + get_updated_guest_xml.assert_called_once() + migrate.assert_called() + detach.asset_called() + + _test_direct() + + migrate_data = objects.LibvirtLiveMigrateData( + serial_listen_addr='', target_connect_addr=None, + bdms=[], block_migration=False, vifs=[vif, vif_direct]) + + @mock.patch.object(drvr, "detach_interface") + @mock.patch('nova.virt.libvirt.migration.get_updated_guest_xml', + side_effect=fake_get_updated_guest_xml) + @mock.patch.object(drvr._host, 'has_min_version', return_value=True) + @mock.patch.object(guest, 'migrate') + def _test_mix(migrate, has_min_version, get_updated_guest_xml, detach): + drvr._live_migration_operation( + self.context, instance, 'dest.host', False, + migrate_data, guest, []) + get_updated_guest_xml.assert_called_once() + migrate.assert_called() + detach.asset_called_once() + + _test_mix() @mock.patch.object(host.Host, 'has_min_version', return_value=True) @mock.patch.object(fakelibvirt.virDomain, "migrateToURI3") @@ -16852,10 +16909,15 @@ class LibvirtConnTestCase(test.NoDBTestCase, for fs in supported_fs: self.assertFalse(drvr.is_supported_fs_format(fs)) + @mock.patch("nova.objects.instance.Instance.image_meta", + new_callable=mock.PropertyMock()) + @mock.patch("nova.virt.libvirt.driver.LibvirtDriver.attach_interface") + @mock.patch('nova.virt.libvirt.guest.Guest.get_interfaces') @mock.patch('nova.virt.libvirt.host.Host.write_instance_config') @mock.patch('nova.virt.libvirt.host.Host.get_guest') def test_post_live_migration_at_destination( - self, mock_get_guest, mock_write_instance_config): + self, mock_get_guest, mock_write_instance_config, + mock_get_interfaces, mock_attach, mock_image_meta): instance = objects.Instance(id=1, uuid=uuids.instance) dom = mock.MagicMock() guest = libvirt_guest.Guest(dom) @@ -16863,10 +16925,24 @@ class LibvirtConnTestCase(test.NoDBTestCase, mock_get_guest.return_value = guest drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) - drvr.post_live_migration_at_destination(mock.ANY, instance, mock.ANY) + net_info = network_model.NetworkInfo() + mock_get_interfaces.return_type = [] + drvr.post_live_migration_at_destination(mock.ANY, instance, net_info) # Assert that we don't try to write anything to the destination node # since the source live migrated with the VIR_MIGRATE_PERSIST_DEST flag mock_write_instance_config.assert_not_called() + mock_attach.assert_not_called() + + vif = network_model.VIF(id=uuids.port_id, + vnic_type=network_model.VNIC_TYPE_NORMAL) + vif_direct = network_model.VIF(id=uuids.port_id, + vnic_type=network_model.VNIC_TYPE_DIRECT) + + net_info = network_model.NetworkInfo([vif, vif_direct]) + mock_get_interfaces.return_type = [vif] + drvr.post_live_migration_at_destination(mock.ANY, instance, net_info) + mock_attach.assert_called_once_with(mock.ANY, instance, + mock_image_meta, vif_direct) def test_create_propagates_exceptions(self): self.flags(virt_type='lxc', group='libvirt') diff --git a/nova/virt/driver.py b/nova/virt/driver.py index 3ffeeb742df5..8ed6d18122e4 100644 --- a/nova/virt/driver.py +++ b/nova/virt/driver.py @@ -1109,6 +1109,16 @@ class ComputeDriver(object): """ raise NotImplementedError() + def rollback_live_migration_at_source(self, context, instance, + migrate_data): + """Clean up source node after a failed live migration. + + :param context: security context + :param instance: instance object that was being migrated + :param migrate_data: a LiveMigrateData object + """ + pass + def rollback_live_migration_at_destination(self, context, instance, network_info, block_device_info, diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index c9939c8d3084..5541d449707f 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -7609,6 +7609,26 @@ class LibvirtDriver(driver.ComputeDriver): ' must disable serial console.') raise exception.MigrationError(reason=msg) + def _detach_direct_passthrough_vifs(self, context, + migrate_data, instance): + """detaches passthrough vif to enable live migration + + :param context: security context + :param migrate_data: a LibvirtLiveMigrateData object + :param instance: instance object that is migrated. + """ + # NOTE(sean-k-mooney): if we have vif data available we + # loop over each vif and detach all direct passthrough + # vifs to allow sriov live migration. + direct_vnics = network_model.VNIC_TYPES_DIRECT_PASSTHROUGH + vifs = [vif.source_vif for vif in migrate_data.vifs + if "source_vif" in vif and vif.source_vif] + for vif in vifs: + if vif['vnic_type'] in direct_vnics: + LOG.info("Detaching vif %s from instnace " + "%s for live migration", vif['id'], instance.id) + self.detach_interface(context, instance, vif) + def _live_migration_operation(self, context, instance, dest, block_migration, migrate_data, guest, device_names): @@ -7683,6 +7703,8 @@ class LibvirtDriver(driver.ComputeDriver): inst_type=instance.flavor, virt_type=CONF.libvirt.virt_type, host=self._host) + self._detach_direct_passthrough_vifs(context, + migrate_data, instance) new_xml_str = libvirt_migrate.get_updated_guest_xml( # TODO(sahid): It's not a really good idea to pass # the method _get_volume_config and we should to find @@ -8131,6 +8153,40 @@ class LibvirtDriver(driver.ComputeDriver): instance.ramdisk_id, instance, fallback_from_host) + def _reattach_instance_vifs(self, context, instance, network_info): + guest = self._host.get_guest(instance) + # validate that the guest has the expected number of interfaces + # attached. + guest_interfaces = guest.get_interfaces() + # NOTE(sean-k-mooney): In general len(guest_interfaces) will + # be equal to len(network_info) as interfaces will not be hot unplugged + # unless they are SR-IOV direct mode interfaces. As such we do not + # need an else block here as it would be a noop. + if len(guest_interfaces) < len(network_info): + # NOTE(sean-k-mooney): we are doing a post live migration + # for a guest with sriov vif that were detached as part of + # the migration. loop over the vifs and attach the missing + # vif as part of the post live migration phase. + direct_vnics = network_model.VNIC_TYPES_DIRECT_PASSTHROUGH + for vif in network_info: + if vif['vnic_type'] in direct_vnics: + LOG.info("Attaching vif %s to instance %s", + vif['id'], instance.id) + self.attach_interface(context, instance, + instance.image_meta, vif) + + def rollback_live_migration_at_source(self, context, instance, + migrate_data): + """reconnect sriov interfaces after failed live migration + :param context: security context + :param instance: the instance being migrated + :param migrate_date: a LibvirtLiveMigrateData object + """ + network_info = network_model.NetworkInfo( + [vif.source_vif for vif in migrate_data.vifs + if "source_vif" in vif and vif.source_vif]) + self._reattach_instance_vifs(context, instance, network_info) + def rollback_live_migration_at_destination(self, context, instance, network_info, block_device_info, @@ -8468,11 +8524,7 @@ class LibvirtDriver(driver.ComputeDriver): :param network_info: instance network information :param block_migration: if true, post operation of block_migration. """ - # The source node set the VIR_MIGRATE_PERSIST_DEST flag when live - # migrating so the guest xml should already be persisted on the - # destination host, so just perform a sanity check to make sure it - # made it as expected. - self._host.get_guest(instance) + self._reattach_instance_vifs(context, instance, network_info) def _get_instance_disk_info_from_config(self, guest_config, block_device_info): diff --git a/releasenotes/notes/sriov-live-migration-0311dfb7102a48db.yaml b/releasenotes/notes/sriov-live-migration-0311dfb7102a48db.yaml new file mode 100644 index 000000000000..814aebf7411c --- /dev/null +++ b/releasenotes/notes/sriov-live-migration-0311dfb7102a48db.yaml @@ -0,0 +1,25 @@ +--- +features: + - | + In this release SR-IOV live migration support is added to the libvirt + virt driver for Neutron interfaces. Neutron SR-IOV interfaces can be + grouped into two categories, direct mode interfaces and indirect. + Direct mode SR-IOV interfaces are directly attached to the guest and + exposed to the guest OS. Indirect mode SR-IOV interfaces have a software + interface such as a macvtap between the guest and the SR-IOV device. + This feature enables transparent live migration for instances with + indirect mode SR-IOV devices. As there is no generic way to copy + hardware state during a live migration, direct mode migration is not + transparent to the guest. For direct mode interfaces, we mimic the + workflow already in place for suspend and resume. For instance with + SR-IOV devices, we detach the direct mode interfaces before migration + and re-attach them after the migration. As a result, instances + with direct mode SR-IOV port will lose network connectivity during a + migration unless a bond with a live migratable interface is created + within the guest. +upgrade: + - | + The Libvirt SR-IOV migration feature intoduced in this release requires + both the source and destination node to support the feature. As a result + it will be automatically disabled until the conductor and compute nodes + have been upgraded.