diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 7529b086ed70..44352909a2a8 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -8699,15 +8699,41 @@ class ComputeManager(manager.Manager): migration, future = ( self._waiting_live_migrations.pop(instance.uuid)) if future and future.cancel(): - # If we got here, we've successfully aborted the queued - # migration and _do_live_migration won't run so we need - # to set the migration status to cancelled and send the - # notification. If Future.cancel() fails, it means - # _do_live_migration is running and the migration status - # is preparing, and _do_live_migration() itself will attempt - # to pop the queued migration, hit a KeyError, and rollback, - # set the migration to cancelled and send the - # live.migration.abort.end notification. + # If we got here, we've successfully dropped a queued + # migration from the queue, so _do_live_migration won't run + # and we only need to revert minor changes introduced by Nova + # control plane (port bindings, resource allocations and + # instance's PCI devices), restore VM's state, set the + # migration's status to cancelled and send the notification. + # If Future.cancel() fails, it means _do_live_migration is + # running and the migration status is preparing, and + # _do_live_migration() itself will attempt to pop the queued + # migration, hit a KeyError, and rollback, set the migration + # to cancelled and send the live.migration.abort.end + # notification. + self._revert_allocation(context, instance, migration) + try: + # This call will delete any inactive destination host + # port bindings. + self.network_api.setup_networks_on_host( + context, instance, host=migration.dest_compute, + teardown=True) + except exception.PortBindingDeletionFailed as e: + # Removing the inactive port bindings from the destination + # host is not critical so just log an error but don't fail. + LOG.error( + 'Network cleanup failed for destination host %s ' + 'during live migration rollback. You may need to ' + 'manually clean up resources in the network service. ' + 'Error: %s', migration.dest_compute, str(e)) + except Exception: + with excutils.save_and_reraise_exception(): + LOG.exception( + 'An error occurred while cleaning up networking ' + 'during live migration rollback.', + instance=instance) + instance.task_state = None + instance.save(expected_task_state=[task_states.MIGRATING]) self._set_migration_status(migration, 'cancelled') except KeyError: migration = objects.Migration.get_by_id(context, migration_id) diff --git a/nova/tests/functional/libvirt/test_live_migration.py b/nova/tests/functional/libvirt/test_live_migration.py index 720f2bcb1ddf..31ff9dfca05b 100644 --- a/nova/tests/functional/libvirt/test_live_migration.py +++ b/nova/tests/functional/libvirt/test_live_migration.py @@ -117,16 +117,12 @@ class LiveMigrationQueuedAbortTestVmStatus(LiveMigrationWithLockBase): '/servers/%s/migrations/%s' % (self.server_b['id'], serverb_migration['id'])) self._wait_for_migration_status(self.server_b, ['cancelled']) - # Unlock live migrations and confirm that server_a becomes - # active again after successful live migration + # Unlock live migrations and confirm that both servers become + # active again after successful (server_a) and aborted + # (server_b) live migrations self.lock_live_migration.release() self._wait_for_state_change(self.server_a, 'ACTIVE') - - # FIXME(artom) Assert the server_b never comes out of 'MIGRATING' - self.assertRaises( - AssertionError, - self._wait_for_state_change, self.server_b, 'ACTIVE') - self._wait_for_state_change(self.server_b, 'MIGRATING') + self._wait_for_state_change(self.server_b, 'ACTIVE') class LiveMigrationQueuedAbortTestLeftoversRemoved(LiveMigrationWithLockBase): @@ -182,18 +178,16 @@ class LiveMigrationQueuedAbortTestLeftoversRemoved(LiveMigrationWithLockBase): '/servers/%s/migrations/%s' % (self.server_b['id'], migration_server_b['id'])) self._wait_for_migration_status(self.server_b, ['cancelled']) - # Unlock live migrations and confirm that server_a becomes - # active again after successful live migration + # Unlock live migrations and confirm that both servers become + # active again after successful (server_a) and aborted + # (server_b) live migrations self.lock_live_migration.release() self._wait_for_state_change(self.server_a, 'ACTIVE') self._wait_for_migration_status(self.server_a, ['completed']) - # FIXME(astupnikov) Assert the server_b never comes out of 'MIGRATING' - # This should be fixed after bug #1949808 is addressed - self._wait_for_state_change(self.server_b, 'MIGRATING') + self._wait_for_state_change(self.server_b, 'ACTIVE') - # FIXME(astupnikov) Because of bug #1960412 allocations for aborted - # queued live migration (server_b) would not be removed. Allocations - # for completed live migration (server_a) should be empty. + # Allocations for both successful (server_a) and aborted queued live + # migration (server_b) should be removed. allocations_server_a_migration = self.placement.get( '/allocations/%s' % migration_server_a['uuid'] ).body['allocations'] @@ -201,17 +195,11 @@ class LiveMigrationQueuedAbortTestLeftoversRemoved(LiveMigrationWithLockBase): allocations_server_b_migration = self.placement.get( '/allocations/%s' % migration_server_b['uuid'] ).body['allocations'] - src_uuid = self.api.api_get( - 'os-hypervisors?hypervisor_hostname_pattern=%s' % - self.src_hostname).body['hypervisors'][0]['id'] - self.assertIn(src_uuid, allocations_server_b_migration) + self.assertEqual({}, allocations_server_b_migration) - # FIXME(astupnikov) Because of bug #1960412 INACTIVE port binding - # on destination host would not be removed when queued live migration - # is aborted, so 2 port bindings would exist for server_b port from - # Neutron's perspective. - # server_a should be migrated to dest compute, server_b should still - # be hosted by src compute. + # INACTIVE port binding on destination host should be removed when + # queued live migration is aborted, so only 1 port binding would + # exist for ports attached to both servers. port_binding_server_a = copy.deepcopy( self.neutron._port_bindings[self.neutron.port_1['id']] ) @@ -220,4 +208,5 @@ class LiveMigrationQueuedAbortTestLeftoversRemoved(LiveMigrationWithLockBase): port_binding_server_b = copy.deepcopy( self.neutron._port_bindings[self.neutron.port_2['id']] ) - self.assertEqual(2, len(port_binding_server_b)) + self.assertEqual(1, len(port_binding_server_b)) + self.assertNotIn('dest', port_binding_server_b) diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 836755b4e68d..cd1a9369c4a6 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -10420,19 +10420,34 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, action='live_migration_abort', phase='end')] ) + @mock.patch.object(objects.Instance, 'save') + @mock.patch.object(manager.ComputeManager, '_revert_allocation') @mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage') @mock.patch.object(objects.Migration, 'get_by_id') @mock.patch('nova.compute.utils.notify_about_instance_action') def test_live_migration_abort_queued(self, mock_notify_action, - mock_get_migration, mock_notify): + mock_get_migration, mock_notify, + mock_revert_allocations, + mock_instance_save): instance = objects.Instance(id=123, uuid=uuids.instance) migration = self._get_migration(10, 'queued', 'live-migration') + migration.dest_compute = uuids.dest + migration.dest_node = uuids.dest migration.save = mock.MagicMock() mock_get_migration.return_value = migration fake_future = mock.MagicMock() self.compute._waiting_live_migrations[instance.uuid] = ( migration, fake_future) - self.compute.live_migration_abort(self.context, instance, migration.id) + with mock.patch.object( + self.compute.network_api, + 'setup_networks_on_host') as mock_setup_net: + self.compute.live_migration_abort( + self.context, instance, migration.id) + mock_setup_net.assert_called_once_with( + self.context, instance, host=migration.dest_compute, + teardown=True) + mock_revert_allocations.assert_called_once_with( + self.context, instance, migration) mock_notify.assert_has_calls( [mock.call(self.context, instance, 'live.migration.abort.start'),