diff --git a/nova/compute/manager.py b/nova/compute/manager.py index a0d38c63192a..de8ddb03d95e 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -28,6 +28,7 @@ terminating it. import base64 import binascii import contextlib +import copy import functools import inspect import sys @@ -85,6 +86,7 @@ from nova.objects import base as obj_base from nova.objects import fields from nova.objects import instance as obj_instance from nova.objects import migrate_data as migrate_data_obj +from nova.pci import request as pci_req_module from nova.pci import whitelist from nova import rpc from nova import safe_utils @@ -6221,6 +6223,15 @@ class ComputeManager(manager.Manager): migrate_data = self.compute_rpcapi.\ check_can_live_migrate_source(ctxt, instance, dest_check_data) + # Create migrate_data vifs + migrate_data.vifs = \ + migrate_data_obj.LiveMigrateData.create_skeleton_migrate_vifs( + instance.get_network_info()) + # Claim PCI devices for VIFs on destination (if needed) + port_id_to_pci = self._claim_pci_for_instance_vifs(ctxt, instance) + # Update migrate VIFs with the newly claimed PCI devices + self._update_migrate_vifs_profile_with_pci(migrate_data.vifs, + port_id_to_pci) finally: self.driver.cleanup_live_migration_destination_check(ctxt, dest_check_data) @@ -6853,6 +6864,9 @@ class ComputeManager(manager.Manager): # method destroy_vifs = True + # Free instance allocations on source before claims are allocated on + # destination node + self.rt.free_pci_device_allocations_for_instance(ctxt, instance) # NOTE(danms): Save source node before calling post method on # destination, which will update it source_node = instance.node @@ -6955,7 +6969,8 @@ class ComputeManager(manager.Manager): self.network_api.setup_networks_on_host(context, instance, self.host) migration = {'source_compute': instance.host, - 'dest_compute': self.host, } + 'dest_compute': self.host, + 'migration_type': 'live-migration'} self.network_api.migrate_instance_finish(context, instance, migration) @@ -6970,6 +6985,8 @@ class ComputeManager(manager.Manager): phase=fields.NotificationPhase.START) block_device_info = self._get_instance_block_device_info(context, instance) + # Allocate the claimed PCI resources at destination. + self.rt.allocate_pci_devices_for_instance(context, instance) try: self.driver.post_live_migration_at_destination( @@ -7187,6 +7204,10 @@ class ComputeManager(manager.Manager): # from remote volumes if necessary block_device_info = self._get_instance_block_device_info(context, instance) + # free any instance PCI claims done on destination during + # check_can_live_migrate_destination() + self.rt.free_pci_device_claims_for_instance(context, instance) + self.driver.rollback_live_migration_at_destination( context, instance, network_info, block_device_info, destroy_disks=destroy_disks, migrate_data=migrate_data) @@ -8532,3 +8553,70 @@ class ComputeManager(manager.Manager): """ objects.ConsoleAuthToken.clean_expired_console_auths_for_host( context, self.host) + + def _claim_pci_for_instance_vifs(self, ctxt, instance): + """Claim PCI devices for the instance's VIFs on the compute node + + :param ctxt: Context + :param instance: Instance object + :return: mapping for the VIFs that yielded a + PCI claim on the compute node + """ + pci_req_id_to_port_id = {} + pci_reqs = [] + port_id_to_pci_dev = {} + + for vif in instance.get_network_info(): + pci_req = pci_req_module.get_instance_pci_request_from_vif( + ctxt, + instance, + vif) + if pci_req: + pci_req_id_to_port_id[pci_req.request_id] = vif['id'] + pci_reqs.append(pci_req) + + if pci_reqs: + # Create PCI requests and claim against PCI resource tracker + # NOTE(adrianc): We claim against the same requests as on the + # source node. + vif_pci_requests = objects.InstancePCIRequests( + requests=pci_reqs, + instance_uuid=instance.uuid) + + claimed_pci_devices_objs = self.rt.claim_pci_devices( + ctxt, + vif_pci_requests) + + # Update VIFMigrateData profile with the newly claimed PCI + # device + for pci_dev in claimed_pci_devices_objs: + LOG.debug("PCI device: %s Claimed on destination node", + pci_dev.address) + port_id = pci_req_id_to_port_id[pci_dev.request_id] + port_id_to_pci_dev[port_id] = pci_dev + + return port_id_to_pci_dev + + def _update_migrate_vifs_profile_with_pci(self, + migrate_vifs, + port_id_to_pci_dev): + """Update migrate vifs profile with the claimed PCI devices + + :param migrate_vifs: list of VIFMigrateData objects + :param port_id_to_pci_dev: a mapping + :return: None. + """ + for mig_vif in migrate_vifs: + port_id = mig_vif.port_id + if port_id not in port_id_to_pci_dev: + continue + + pci_dev = port_id_to_pci_dev[port_id] + profile = copy.deepcopy(mig_vif.source_vif['profile']) + profile['pci_slot'] = pci_dev.address + profile['pci_vendor_info'] = ':'.join([pci_dev.vendor_id, + pci_dev.product_id]) + mig_vif.profile = profile + LOG.debug("Updating migrate VIF profile for port %(port_id)s:" + "%(profile)s", {'port_id': port_id, + 'profile': profile}) diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index 29b6de0af5fd..6dfa4bc12620 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -1514,3 +1514,46 @@ class ResourceTracker(object): def build_succeeded(self, nodename): """Resets the failed_builds stats for the given node.""" self.stats[nodename].build_succeeded() + + @utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE) + def claim_pci_devices(self, context, pci_requests): + """Claim instance PCI resources + + :param context: security context + :param pci_requests: a list of nova.objects.InstancePCIRequests + :returns: a list of nova.objects.PciDevice objects + """ + result = self.pci_tracker.claim_instance( + context, pci_requests, None) + self.pci_tracker.save(context) + return result + + @utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE) + def allocate_pci_devices_for_instance(self, context, instance): + """Allocate instance claimed PCI resources + + :param context: security context + :param instance: instance object + """ + self.pci_tracker.allocate_instance(instance) + self.pci_tracker.save(context) + + @utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE) + def free_pci_device_allocations_for_instance(self, context, instance): + """Free instance allocated PCI resources + + :param context: security context + :param instance: instance object + """ + self.pci_tracker.free_instance_allocations(context, instance) + self.pci_tracker.save(context) + + @utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE) + def free_pci_device_claims_for_instance(self, context, instance): + """Free instance claimed PCI resources + + :param context: security context + :param instance: instance object + """ + self.pci_tracker.free_instance_claims(context, instance) + self.pci_tracker.save(context) diff --git a/nova/conductor/tasks/live_migrate.py b/nova/conductor/tasks/live_migrate.py index 3472bf5fdc36..95a743602877 100644 --- a/nova/conductor/tasks/live_migrate.py +++ b/nova/conductor/tasks/live_migrate.py @@ -25,6 +25,7 @@ from nova.i18n import _ from nova import network from nova import objects from nova.objects import fields as obj_fields +from nova.objects import migrate_data as migrate_data_obj from nova.scheduler import utils as scheduler_utils LOG = logging.getLogger(__name__) @@ -44,6 +45,19 @@ def supports_extended_port_binding(context, host): return svc.version >= 35 +def supports_vif_related_pci_allocations(context, host): + """Checks if the compute host service is new enough to support + VIF related PCI allocation during live migration + + :param context: The user request context. + :param host: The nova-compute host to check. + :returns: True if the compute host is new enough to support vif related + PCI allocations + """ + svc = objects.Service.get_by_host_and_binary(context, host, 'nova-compute') + return svc.version >= 36 + + class LiveMigrationTask(base.TaskBase): def __init__(self, context, instance, destination, block_migration, disk_over_commit, migration, compute_rpcapi, @@ -186,6 +200,47 @@ class LiveMigrationTask(base.TaskBase): else: raise exception.MigrationPreCheckError(reason=msg) + def _check_can_migrate_pci(self, src_host, dest_host): + """Checks that an instance can migrate with PCI requests. + + At the moment support only if: + + 1. Instance contains VIF related PCI requests. + 2. Neutron supports multiple port binding extension. + 3. Src and Dest host support VIF related PCI allocations. + """ + if self.instance.pci_requests is None or not len( + self.instance.pci_requests.requests): + return + + for pci_request in self.instance.pci_requests.requests: + if pci_request.alias_name is not None: + # allow only VIF related PCI requests in live migration. + # PCI requests come from two sources: instance flavor and + # SR-IOV ports. + # SR-IOV ports pci_request don't have an alias_name. + # TODO(adrianc): add an is_sriov_port property to PCIRequest + # to make this cryptic check clearer (also in resource_tracker) + + raise exception.MigrationPreCheckError( + reason= "non-VIF related PCI requests for instance " + "are not allowed for live migration.") + # All PCI requests are VIF related, now check neutron, + # source and destination compute nodes. + if not self.network_api.supports_port_binding_extension( + self.context): + raise exception.MigrationPreCheckError( + reason="Cannot live migrate VIF with related PCI, Neutron " + "does not support required port binding extension.") + if not (supports_vif_related_pci_allocations(self.context, + src_host) and + supports_vif_related_pci_allocations(self.context, + dest_host)): + raise exception.MigrationPreCheckError( + reason="Cannot live migrate VIF with related PCI, " + "source and destination nodes do not support " + "the operation.") + def _check_host_is_up(self, host): service = objects.Service.get_by_compute_host(self.context, host) @@ -265,6 +320,7 @@ class LiveMigrationTask(base.TaskBase): return source_info, destination_info def _call_livem_checks_on_host(self, destination): + self._check_can_migrate_pci(self.source, destination) try: self.migrate_data = self.compute_rpcapi.\ check_can_live_migrate_destination(self.context, self.instance, @@ -280,8 +336,17 @@ class LiveMigrationTask(base.TaskBase): if (self.network_api.supports_port_binding_extension(self.context) and supports_extended_port_binding(self.context, self.source) and supports_extended_port_binding(self.context, destination)): - self.migrate_data.vifs = ( - self._bind_ports_on_destination(destination)) + if 'vifs' not in self.migrate_data: + # migrate data vifs were not constructed in dest compute + # during check_can_live_migrate_destination, construct a + # skeleton to be updated after port binding. + # TODO(adrianc): This can be removed once we move to T release + self.migrate_data.vifs = migrate_data_obj.LiveMigrateData.\ + create_skeleton_migrate_vifs( + self.instance.get_network_info()) + bindings = self._bind_ports_on_destination(destination) + self._update_migrate_vifs_from_bindings(self.migrate_data.vifs, + bindings) def _bind_ports_on_destination(self, destination): LOG.debug('Start binding ports on destination host: %s', destination, @@ -291,23 +356,33 @@ class LiveMigrationTask(base.TaskBase): # that was bound. This information is then stuffed into the # migrate_data. try: + # Note(adrianc): migrate_data.vifs was partially filled + # by destination compute if compute is new enough. + # if that is the case, it may have updated the required port + # profile for the destination node (e.g new PCI address if SR-IOV) + # perform port binding against the requested profile + migrate_vifs_with_profile = [mig_vif for mig_vif in + self.migrate_data.vifs + if 'profile_json' in mig_vif] + + ports_profile = None + if migrate_vifs_with_profile: + # Update to the port profile is required + ports_profile = {mig_vif.port_id: mig_vif.profile + for mig_vif in migrate_vifs_with_profile} + bindings = self.network_api.bind_ports_to_host( - self.context, self.instance, destination) + self.context, self.instance, destination, None, ports_profile) except exception.PortBindingFailed as e: # Port binding failed for that host, try another one. raise exception.MigrationPreCheckError( reason=e.format_message()) + return bindings - source_vif_map = { - vif['id']: vif for vif in self.instance.get_network_info() - } - migrate_vifs = [] - for port_id, binding in bindings.items(): - migrate_vif = objects.VIFMigrateData( - port_id=port_id, **binding) - migrate_vif.source_vif = source_vif_map[port_id] - migrate_vifs.append(migrate_vif) - return migrate_vifs + def _update_migrate_vifs_from_bindings(self, migrate_vifs, bindings): + for migrate_vif in migrate_vifs: + for attr_name, attr_val in bindings[migrate_vif.port_id].items(): + setattr(migrate_vif, attr_name, attr_val) def _get_source_cell_mapping(self): """Returns the CellMapping for the cell in which the instance lives diff --git a/nova/network/neutronv2/api.py b/nova/network/neutronv2/api.py index 8128ebf7a2c2..a17932775bd6 100644 --- a/nova/network/neutronv2/api.py +++ b/nova/network/neutronv2/api.py @@ -3335,7 +3335,11 @@ class API(base_api.NetworkAPI): # as in an unshelve operation. vnic_type = p.get('binding:vnic_type') if (vnic_type in network_model.VNIC_TYPES_SRIOV - and migration is not None): + and migration is not None + and migration['migration_type'] != + constants.LIVE_MIGRATION): + # Note(adrianc): for live migration binding profile was already + # updated in conductor when calling bind_ports_to_host() if not pci_mapping: pci_mapping = self._get_pci_mapping_for_migration(context, instance, migration) diff --git a/nova/network/neutronv2/constants.py b/nova/network/neutronv2/constants.py index 679ce87d7cc2..bb0269dd2a35 100644 --- a/nova/network/neutronv2/constants.py +++ b/nova/network/neutronv2/constants.py @@ -20,3 +20,4 @@ DNS_INTEGRATION = 'DNS Integration' MULTI_NET_EXT = 'Multi Provider Network' SUBSTR_PORT_FILTERING = 'IP address substring filtering' PORT_BINDING_EXTENDED = 'Port Bindings Extended' +LIVE_MIGRATION = 'live-migration' diff --git a/nova/objects/migrate_data.py b/nova/objects/migrate_data.py index a3ca205cd0a6..7f863ec3817f 100644 --- a/nova/objects/migrate_data.py +++ b/nova/objects/migrate_data.py @@ -120,6 +120,23 @@ class LiveMigrateData(obj_base.NovaObject): 'vifs': fields.ListOfObjectsField('VIFMigrateData'), } + @staticmethod + def create_skeleton_migrate_vifs(vifs): + """Create migrate vifs for live migration. + + :param vifs: a list of VIFs. + :return: list of VIFMigrateData object corresponding to the provided + VIFs. + """ + vif_mig_data = [] + + for vif in vifs: + mig_vif = VIFMigrateData( + port_id=vif['id'], + source_vif=vif) + vif_mig_data.append(mig_vif) + return vif_mig_data + @obj_base.NovaObjectRegistry.register class LibvirtLiveMigrateBDMInfo(obj_base.NovaObject): diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 917ff0fec0ea..87bbf9f6a9fe 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -56,6 +56,7 @@ from nova.objects import fields from nova.objects import instance as instance_obj from nova.objects import migrate_data as migrate_data_obj from nova.objects import network_request as net_req_obj +from nova.pci import request as pci_request from nova import test from nova.tests import fixtures from nova.tests.unit.api.openstack import fakes @@ -2692,7 +2693,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, src_info = 'src_info' dest_info = 'dest_info' dest_check_data = dict(foo='bar') - mig_data = dict(cow='moo') + mig_data = mock.MagicMock() + mig_data.cow = 'moo' with test.nested( mock.patch.object(self.compute, '_get_compute_info'), @@ -2703,9 +2705,16 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, mock.patch.object(self.compute.driver, 'cleanup_live_migration_destination_check'), mock.patch.object(db, 'instance_fault_create'), - mock.patch.object(compute_utils, 'EventReporter') + mock.patch.object(compute_utils, 'EventReporter'), + mock.patch.object(migrate_data_obj.LiveMigrateData, + 'create_skeleton_migrate_vifs'), + mock.patch.object(instance, 'get_network_info'), + mock.patch.object(self.compute, '_claim_pci_for_instance_vifs'), + mock.patch.object(self.compute, + '_update_migrate_vifs_profile_with_pci'), ) as (mock_get, mock_check_dest, mock_check_src, mock_check_clean, - mock_fault_create, mock_event): + mock_fault_create, mock_event, mock_create_mig_vif, + mock_nw_info, mock_claim_pci, mock_update_mig_vif): mock_get.side_effect = (src_info, dest_info) mock_check_dest.return_value = dest_check_data @@ -7912,7 +7921,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, _do_test() def test_post_live_migration_at_destination_success(self): - + @mock.patch.object(self.compute, 'rt') @mock.patch.object(self.instance, 'save') @mock.patch.object(self.compute.network_api, 'get_instance_nw_info', return_value='test_network') @@ -7929,7 +7938,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, _get_compute_info, _get_power_state, _get_instance_block_device_info, _notify_about_instance_usage, migrate_instance_finish, - setup_networks_on_host, get_instance_nw_info, save): + setup_networks_on_host, get_instance_nw_info, save, + rt_mock): cn = mock.Mock(spec_set=['hypervisor_hostname']) cn.hypervisor_hostname = 'test_host' @@ -7966,7 +7976,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, migrate_instance_finish.assert_called_once_with( self.context, self.instance, {'source_compute': cn_old, - 'dest_compute': self.compute.host}) + 'dest_compute': self.compute.host, + 'migration_type': 'live-migration'}) _get_instance_block_device_info.assert_called_once_with( self.context, self.instance ) @@ -7976,6 +7987,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, self.instance) _get_compute_info.assert_called_once_with(self.context, self.compute.host) + rt_mock.allocate_pci_devices_for_instance.assert_called_once_with( + self.context, self.instance) self.assertEqual(self.compute.host, self.instance.host) self.assertEqual('test_host', self.instance.node) @@ -7988,7 +8001,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, _do_test() def test_post_live_migration_at_destination_compute_not_found(self): - + @mock.patch.object(self.compute, 'rt') @mock.patch.object(self.instance, 'save') @mock.patch.object(self.compute, 'network_api') @mock.patch.object(self.compute, '_notify_about_instance_usage') @@ -8003,7 +8016,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, def _do_test(mock_notify, post_live_migration_at_destination, _get_compute_info, _get_power_state, _get_instance_block_device_info, - _notify_about_instance_usage, network_api, save): + _notify_about_instance_usage, network_api, + save, rt_mock): cn = mock.Mock(spec_set=['hypervisor_hostname']) cn.hypervisor_hostname = 'test_host' _get_compute_info.return_value = cn @@ -8021,6 +8035,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, def test_post_live_migration_at_destination_unexpected_exception(self): + @mock.patch.object(self.compute, 'rt') @mock.patch.object(compute_utils, 'add_instance_fault_from_exc') @mock.patch.object(self.instance, 'save') @mock.patch.object(self.compute, 'network_api') @@ -8034,7 +8049,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, def _do_test(post_live_migration_at_destination, _get_compute_info, _get_power_state, _get_instance_block_device_info, _notify_about_instance_usage, network_api, save, - add_instance_fault_from_exc): + add_instance_fault_from_exc, rt_mock): cn = mock.Mock(spec_set=['hypervisor_hostname']) cn.hypervisor_hostname = 'test_host' _get_compute_info.return_value = cn @@ -8053,6 +8068,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, """Tests that neutron fails to delete the source host port bindings but we handle the error and just log it. """ + @mock.patch.object(self.compute, 'rt') @mock.patch.object(self.compute, '_notify_about_instance_usage') @mock.patch.object(self.compute, 'network_api') @mock.patch.object(self.compute, '_get_instance_block_device_info') @@ -8063,7 +8079,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, hypervisor_hostname='fake-dest-host')) @mock.patch.object(self.instance, 'save') def _do_test(instance_save, get_compute_node, get_power_state, - get_bdms, network_api, legacy_notify): + get_bdms, network_api, legacy_notify, rt_mock): # setup_networks_on_host is called three times: # 1. set the migrating_to port binding profile value (no-op) # 2. delete the source host port bindings - we make this raise @@ -8095,8 +8111,12 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, 'foo', *args, source_bdms=bdms, **kwargs) + + mock_rt = self._mock_rt() result = _do_call() mock_clean.assert_called_once_with(self.context, self.instance.uuid) + mock_rt.free_pci_device_allocations_for_instance.\ + asset_called_once_with(self.context, self.instance) return result def test_post_live_migration_new_allocations(self): @@ -8125,8 +8145,6 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, def test_post_live_migration_cinder_v3_api(self): # Because live migration has succeeded, _post_live_migration # should call attachment_delete with the original/old attachment_id - compute = manager.ComputeManager() - dest_host = 'test_dest_host' instance = fake_instance.fake_instance_obj(self.context, node='dest', @@ -8159,26 +8177,29 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, 'clean_console_auths_for_instance') @mock.patch.object(migrate_data.migration, 'save', new=lambda: None) - @mock.patch.object(compute.reportclient, + @mock.patch.object(self.compute.reportclient, 'get_allocations_for_consumer_by_provider') @mock.patch.object(vol_bdm, 'save') - @mock.patch.object(compute, 'update_available_resource') - @mock.patch.object(compute.volume_api, 'attachment_delete') - @mock.patch.object(compute, '_get_instance_block_device_info') - @mock.patch.object(compute, 'compute_rpcapi') - @mock.patch.object(compute, 'driver') - @mock.patch.object(compute, '_notify_about_instance_usage') - @mock.patch.object(compute, 'network_api') + @mock.patch.object(self.compute, 'update_available_resource') + @mock.patch.object(self.compute.volume_api, 'attachment_delete') + @mock.patch.object(self.compute, '_get_instance_block_device_info') + @mock.patch.object(self.compute, 'compute_rpcapi') + @mock.patch.object(self.compute, 'driver') + @mock.patch.object(self.compute, '_notify_about_instance_usage') + @mock.patch.object(self.compute, 'network_api') def _test(mock_net_api, mock_notify, mock_driver, mock_rpc, mock_get_bdm_info, mock_attach_delete, mock_update_resource, mock_bdm_save, mock_ga, mock_clean, mock_notify_action): self._mock_rt() + bdms = objects.BlockDeviceMappingList(objects=[vol_bdm, image_bdm]) - compute._post_live_migration(self.context, instance, dest_host, - migrate_data=migrate_data, - source_bdms=bdms) + self.compute._post_live_migration(self.context, + instance, + dest_host, + migrate_data=migrate_data, + source_bdms=bdms) mock_attach_delete.assert_called_once_with( self.context, uuids.attachment) @@ -8357,12 +8378,14 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, """Tests that neutron fails to delete the destination host port bindings but we handle the error and just log it. """ + @mock.patch.object(self.compute, 'rt') @mock.patch.object(self.compute, '_notify_about_instance_usage') @mock.patch.object(self.compute, 'network_api') @mock.patch.object(self.compute, '_get_instance_block_device_info') @mock.patch.object(self.compute.driver, 'rollback_live_migration_at_destination') - def _do_test(driver_rollback, get_bdms, network_api, legacy_notify): + def _do_test(driver_rollback, get_bdms, network_api, legacy_notify, + rt_mock): self.compute.network_api.setup_networks_on_host.side_effect = ( exception.PortBindingDeletionFailed( port_id=uuids.port_id, host='fake-dest-host')) @@ -8377,6 +8400,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, network_api.get_instance_nw_info.return_value, get_bdms.return_value, destroy_disks=False, migrate_data=mock.sentinel.migrate_data) + rt_mock.free_pci_device_claims_for_instance.\ + assert_called_once_with(self.context, self.instance) _do_test() @@ -8612,6 +8637,111 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, doit() + def test__claim_pci_for_instance_vifs(self): + @mock.patch.object(self.compute, 'rt') + @mock.patch.object(pci_request, 'get_instance_pci_request_from_vif') + @mock.patch.object(self.instance, 'get_network_info') + def _test(mock_get_network_info, + mock_get_instance_pci_request_from_vif, + rt_mock): + # when no VIFS, expecting no pci devices to be claimed + mock_get_network_info.return_value = [] + port_id_to_pci = self.compute._claim_pci_for_instance_vifs( + self.context, + self.instance) + rt_mock.claim_pci_devices.assert_not_called() + self.assertEqual(0, len(port_id_to_pci)) + # when there are VIFs, expect only ones with related PCI to be + # claimed and their migrate vif profile to be updated. + + # Mock needed objects + nw_vifs = network_model.NetworkInfo([ + network_model.VIF( + id=uuids.port0, + vnic_type='direct', + type=network_model.VIF_TYPE_HW_VEB, + profile={'pci_slot': '0000:04:00.3', + 'pci_vendor_info': '15b3:1018', + 'physical_network': 'default'}), + network_model.VIF( + id=uuids.port1, + vnic_type='normal', + type=network_model.VIF_TYPE_OVS, + profile={'some': 'attribute'})]) + mock_get_network_info.return_value = nw_vifs + + pci_req = objects.InstancePCIRequest(request_id=uuids.pci_req) + instance_pci_reqs = objects.InstancePCIRequests(requests=[pci_req]) + instance_pci_devs = objects.PciDeviceList( + objects=[objects.PciDevice(request_id=uuids.pci_req, + address='0000:04:00.3', + vendor_id='15b3', + product_id='1018')]) + + def get_pci_req_side_effect(context, instance, vif): + if vif['id'] == uuids.port0: + return pci_req + return None + + mock_get_instance_pci_request_from_vif.side_effect = \ + get_pci_req_side_effect + self.instance.pci_devices = instance_pci_devs + self.instance.pci_requests = instance_pci_reqs + + rt_mock.reset() + claimed_pci_dev = objects.PciDevice(request_id=uuids.pci_req, + address='0000:05:00.4', + vendor_id='15b3', + product_id='1018') + rt_mock.claim_pci_devices.return_value = [claimed_pci_dev] + + # Do the actual test + port_id_to_pci = self.compute._claim_pci_for_instance_vifs( + self.context, + self.instance) + self.assertEqual(len(nw_vifs), + mock_get_instance_pci_request_from_vif.call_count) + self.assertTrue(rt_mock.claim_pci_devices.called) + self.assertEqual(len(port_id_to_pci), 1) + + _test() + + def test__update_migrate_vifs_profile_with_pci(self): + # Define two migrate vifs with only one pci that is required + # to be updated. Make sure method under test updated the correct one + nw_vifs = network_model.NetworkInfo( + [network_model.VIF( + id=uuids.port0, + vnic_type='direct', + type=network_model.VIF_TYPE_HW_VEB, + profile={'pci_slot': '0000:04:00.3', + 'pci_vendor_info': '15b3:1018', + 'physical_network': 'default'}), + network_model.VIF( + id=uuids.port1, + vnic_type='normal', + type=network_model.VIF_TYPE_OVS, + profile={'some': 'attribute'})]) + pci_dev = objects.PciDevice(request_id=uuids.pci_req, + address='0000:05:00.4', + vendor_id='15b3', + product_id='1018') + port_id_to_pci_dev = {uuids.port0: pci_dev} + mig_vifs = migrate_data_obj.LiveMigrateData.\ + create_skeleton_migrate_vifs(nw_vifs) + self.compute._update_migrate_vifs_profile_with_pci(mig_vifs, + port_id_to_pci_dev) + # Make sure method under test updated the correct one. + changed_mig_vif = mig_vifs[0] + unchanged_mig_vif = mig_vifs[1] + # Migrate vifs profile was updated with pci_dev.address + # for port ID uuids.port0. + self.assertEqual(changed_mig_vif.profile['pci_slot'], + pci_dev.address) + # Migrate vifs profile was unchanged for port ID uuids.port1. + # i.e 'profile' attribute does not exist. + self.assertNotIn('profile', unchanged_mig_vif) + class ComputeManagerInstanceUsageAuditTestCase(test.TestCase): def setUp(self): diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py index adcff68c65dd..d644a54ceaf9 100644 --- a/nova/tests/unit/compute/test_resource_tracker.py +++ b/nova/tests/unit/compute/test_resource_tracker.py @@ -3403,3 +3403,48 @@ class OverCommitTestCase(BaseTestCase): def test_disk_allocation_ratio_none_negative(self): self.assertRaises(ValueError, CONF.set_default, 'disk_allocation_ratio', -1.0) + + +class TestPciTrackerDelegationMethods(BaseTestCase): + + def setUp(self): + super(TestPciTrackerDelegationMethods, self).setUp() + self._setup_rt() + self.rt.pci_tracker = mock.MagicMock() + self.context = context.RequestContext(mock.sentinel.user_id, + mock.sentinel.project_id) + self.instance = _INSTANCE_FIXTURES[0].obj_clone() + + def test_claim_pci_devices(self): + request = objects.InstancePCIRequest( + count=1, + spec=[{'vendor_id': 'v', 'product_id': 'p'}]) + pci_requests = objects.InstancePCIRequests( + requests=[request], + instance_uuid=self.instance.uuid) + self.rt.claim_pci_devices(self.context, pci_requests) + self.rt.pci_tracker.claim_instance.assert_called_once_with( + self.context, pci_requests, None) + self.assertTrue(self.rt.pci_tracker.save.called) + + def test_allocate_pci_devices_for_instance(self): + self.rt.allocate_pci_devices_for_instance(self.context, self.instance) + self.rt.pci_tracker.allocate_instance.assert_called_once_with( + self.instance) + self.assertTrue(self.rt.pci_tracker.save.called) + + def test_free_pci_device_allocations_for_instance(self): + self.rt.free_pci_device_allocations_for_instance(self.context, + self.instance) + self.rt.pci_tracker.free_instance_allocations.assert_called_once_with( + self.context, + self.instance) + self.assertTrue(self.rt.pci_tracker.save.called) + + def test_free_pci_device_claims_for_instance(self): + self.rt.free_pci_device_claims_for_instance(self.context, + self.instance) + self.rt.pci_tracker.free_instance_claims.assert_called_once_with( + self.context, + self.instance) + self.assertTrue(self.rt.pci_tracker.save.called) diff --git a/nova/tests/unit/conductor/tasks/test_live_migrate.py b/nova/tests/unit/conductor/tasks/test_live_migrate.py index 8e9111bc3356..a24b4b88dca2 100644 --- a/nova/tests/unit/conductor/tasks/test_live_migrate.py +++ b/nova/tests/unit/conductor/tasks/test_live_migrate.py @@ -265,9 +265,11 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase): mock_get_info.return_value = hypervisor_details mock_check.return_value = "migrate_data" - with mock.patch.object(self.task.network_api, - 'supports_port_binding_extension', - return_value=False): + with test.nested( + mock.patch.object(self.task.network_api, + 'supports_port_binding_extension', + return_value=False), + mock.patch.object(self.task, '_check_can_migrate_pci')): self.assertEqual((hypervisor_details, hypervisor_details), self.task._check_requested_destination()) self.assertEqual("migrate_data", self.task.migrate_data) @@ -371,9 +373,11 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase): mock_get_info.return_value = hypervisor_details mock_check.return_value = "migrate_data" - with mock.patch.object(self.task.network_api, - 'supports_port_binding_extension', - return_value=False): + with test.nested( + mock.patch.object(self.task.network_api, + 'supports_port_binding_extension', + return_value=False), + mock.patch.object(self.task, '_check_can_migrate_pci')): ex = self.assertRaises(exception.MigrationPreCheckError, self.task._check_requested_destination) self.assertIn('across cells', six.text_type(ex)) @@ -569,11 +573,13 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase): self.task._find_destination) def test_call_livem_checks_on_host(self): - with mock.patch.object(self.task.compute_rpcapi, - 'check_can_live_migrate_destination', - side_effect=messaging.MessagingTimeout): + with test.nested( + mock.patch.object(self.task.compute_rpcapi, + 'check_can_live_migrate_destination', + side_effect=messaging.MessagingTimeout), + mock.patch.object(self.task, '_check_can_migrate_pci')): self.assertRaises(exception.MigrationPreCheckError, - self.task._call_livem_checks_on_host, {}) + self.task._call_livem_checks_on_host, {}) @mock.patch('nova.conductor.tasks.live_migrate.' 'supports_extended_port_binding', return_value=True) @@ -584,6 +590,7 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase): uuids.port2: {'host': 'dest-host'} } + @mock.patch.object(self.task, '_check_can_migrate_pci') @mock.patch.object(self.task.compute_rpcapi, 'check_can_live_migrate_destination', return_value=data) @@ -592,8 +599,10 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase): return_value=True) @mock.patch.object(self.task.network_api, 'bind_ports_to_host', return_value=bindings) - def _test(mock_bind_ports_to_host, mock_supports_port_binding, - mock_check_can_live_migrate_dest): + def _test(mock_bind_ports_to_host, + mock_supports_port_binding, + mock_check_can_live_migrate_dest, + mock_check_can_migrate_pci): nwinfo = network_model.NetworkInfo([ network_model.VIF(uuids.port1), network_model.VIF(uuids.port2)]) @@ -646,3 +655,56 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase): """ self.task._remove_host_allocations('host', 'node') remove_provider.assert_not_called() + + def test_check_can_migrate_pci(self): + """Tests that _check_can_migrate_pci() allows live-migration if + instance does not contain non-network related PCI requests and + raises MigrationPreCheckError otherwise + """ + + @mock.patch.object(self.task.network_api, + 'supports_port_binding_extension') + @mock.patch.object(live_migrate, + 'supports_vif_related_pci_allocations') + def _test(instance_pci_reqs, + supp_binding_ext_retval, + supp_vif_related_pci_alloc_retval, + mock_supp_vif_related_pci_alloc, + mock_supp_port_binding_ext): + mock_supp_vif_related_pci_alloc.return_value = \ + supp_vif_related_pci_alloc_retval + mock_supp_port_binding_ext.return_value = \ + supp_binding_ext_retval + self.task.instance.pci_requests = instance_pci_reqs + self.task._check_can_migrate_pci("Src", "Dst") + # in case we managed to get away without rasing, check mocks + mock_supp_port_binding_ext.called_once_with(self.context) + if instance_pci_reqs: + self.assertTrue(mock_supp_vif_related_pci_alloc.called) + + # instance has no PCI requests + _test(None, False, False) # No support in Neutron and Computes + _test(None, True, False) # No support in Computes + _test(None, False, True) # No support in Neutron + _test(None, True, True) # Support in both Neutron and Computes + # instance contains network related PCI requests (alias_name=None) + pci_requests = objects.InstancePCIRequests( + requests=[objects.InstancePCIRequest(alias_name=None)]) + self.assertRaises(exception.MigrationPreCheckError, + _test, pci_requests, False, False) + self.assertRaises(exception.MigrationPreCheckError, + _test, pci_requests, True, False) + self.assertRaises(exception.MigrationPreCheckError, + _test, pci_requests, False, True) + _test(pci_requests, True, True) + # instance contains Non network related PCI requests (alias_name!=None) + pci_requests.requests.append( + objects.InstancePCIRequest(alias_name="non-network-related-pci")) + self.assertRaises(exception.MigrationPreCheckError, + _test, pci_requests, False, False) + self.assertRaises(exception.MigrationPreCheckError, + _test, pci_requests, True, False) + self.assertRaises(exception.MigrationPreCheckError, + _test, pci_requests, False, True) + self.assertRaises(exception.MigrationPreCheckError, + _test, pci_requests, True, True) diff --git a/nova/tests/unit/network/test_neutronv2.py b/nova/tests/unit/network/test_neutronv2.py index 3d634424b0ea..90470cdb2de5 100644 --- a/nova/tests/unit/network/test_neutronv2.py +++ b/nova/tests/unit/network/test_neutronv2.py @@ -4229,7 +4229,8 @@ class TestNeutronv2WithMock(TestNeutronv2Base): 'pci_vendor_info': 'old_pci_vendor_info'}}, {'id': 'fake-port-2', neutronapi.BINDING_HOST_ID: instance.host}]} - migration = {'status': 'confirmed'} + migration = {'status': 'confirmed', + 'migration_type': "migration"} list_ports_mock = mock.Mock(return_value=fake_ports) get_client_mock.return_value.list_ports = list_ports_mock @@ -4284,7 +4285,8 @@ class TestNeutronv2WithMock(TestNeutronv2Base): {'pci_slot': '0000:0a:00.1', 'physical_network': 'old_phys_net', 'pci_vendor_info': 'old_pci_vendor_info'}}]} - migration = {'status': 'confirmed'} + migration = {'status': 'confirmed', + 'migration_type': "migration"} list_ports_mock = mock.Mock(return_value=fake_ports) get_client_mock.return_value.list_ports = list_ports_mock @@ -4416,6 +4418,51 @@ class TestNeutronv2WithMock(TestNeutronv2Base): instance.availability_zone }}) + @mock.patch.object(neutronapi.API, '_get_pci_mapping_for_migration') + @mock.patch.object(pci_whitelist.Whitelist, 'get_devspec') + @mock.patch.object(neutronapi, 'get_client', return_value=mock.Mock()) + def test_update_port_bindings_for_instance_with_live_migration( + self, + get_client_mock, + get_devspec_mock, + get_pci_mapping_mock): + + devspec = mock.Mock() + devspec.get_tags.return_value = {'physical_network': 'physnet1'} + get_devspec_mock.return_value = devspec + + instance = fake_instance.fake_instance_obj(self.context) + fake_ports = {'ports': [ + {'id': 'fake-port-1', + 'binding:vnic_type': 'direct', + neutronapi.BINDING_HOST_ID: 'old-host', + neutronapi.BINDING_PROFILE: + {'pci_slot': '0000:0a:00.1', + 'physical_network': 'phys_net', + 'pci_vendor_info': 'vendor_info'}}]} + migration = {'status': 'confirmed', + 'migration_type': "live-migration"} + list_ports_mock = mock.Mock(return_value=fake_ports) + get_client_mock.return_value.list_ports = list_ports_mock + update_port_mock = mock.Mock() + get_client_mock.return_value.update_port = update_port_mock + + self.api._update_port_binding_for_instance(self.context, instance, + 'new-host', migration) + # Assert _get_pci_mapping_for_migration was not called + self.assertFalse(get_pci_mapping_mock.called) + + # Assert that update_port() does not update binding:profile + # and that it updates host ID + called_port_id = update_port_mock.call_args[0][0] + called_port_attributes = update_port_mock.call_args[0][1] + self.assertEqual(called_port_id, fake_ports['ports'][0]['id']) + self.assertNotIn( + neutronapi.BINDING_PROFILE, called_port_attributes['port']) + self.assertEqual( + called_port_attributes['port'][neutronapi.BINDING_HOST_ID], + 'new-host') + def test_get_pci_mapping_for_migration(self): instance = fake_instance.fake_instance_obj(self.context) instance.migration_context = objects.MigrationContext() diff --git a/nova/tests/unit/objects/test_migrate_data.py b/nova/tests/unit/objects/test_migrate_data.py index 07fc30674469..06ab07216db8 100644 --- a/nova/tests/unit/objects/test_migrate_data.py +++ b/nova/tests/unit/objects/test_migrate_data.py @@ -44,7 +44,15 @@ class _TestLiveMigrateData(object): class TestLiveMigrateData(test_objects._LocalTest, _TestLiveMigrateData): - pass + def test_create_skeleton_migrate_vifs(self): + vifs = [ + network_model.VIF(id=uuids.port1), + network_model.VIF(id=uuids.port2)] + mig_vifs = migrate_data.LiveMigrateData.create_skeleton_migrate_vifs( + vifs) + self.assertEqual(len(vifs), len(mig_vifs)) + self.assertEqual([vif['id'] for vif in vifs], + [mig_vif.port_id for mig_vif in mig_vifs]) class TestRemoteLiveMigrateData(test_objects._RemoteTest, diff --git a/releasenotes/notes/live-migration-with-PCI-device-b96bdad273fa1d2b.yaml b/releasenotes/notes/live-migration-with-PCI-device-b96bdad273fa1d2b.yaml new file mode 100644 index 000000000000..6a200a6780c7 --- /dev/null +++ b/releasenotes/notes/live-migration-with-PCI-device-b96bdad273fa1d2b.yaml @@ -0,0 +1,17 @@ +--- +upgrade: + - | + Live migration of an instance with PCI devices is now blocked + in the following scenarios: + + 1. Instance with non-network related PCI device. + 2. Instance with network related PCI device and either: + a. Neutron does not support extended port binding API. + b. Source or destination compute node does not support + libvirt-sriov-live-migration. + + Live migration will fail with a user friendly error. + + .. note:: Previously, the operation would have failed with an obscure error. + Ending up with the instance still running on the source node or ending up + in an inoperable state.