diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 72e88ce6e6e0..281f2733aab7 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -5654,6 +5654,14 @@ class ComputeManager(manager.Manager): instance.host = migration.dest_compute instance.node = migration.dest_node + # NOTE(gibi): as the instance now tracked on the destination we + # have to make sure that the source compute resource track can + # track this instance as a migration. For that the resource tracker + # needs to see the old_flavor set on the instance. The old_flavor + # setting used to be done on the destination host in finish_resize + # but that is racy with a source host update_available_resource + # periodic run + instance.old_flavor = instance.flavor instance.task_state = task_states.RESIZE_MIGRATED instance.save(expected_task_state=task_states.RESIZE_MIGRATING) @@ -5767,6 +5775,10 @@ class ComputeManager(manager.Manager): # to ACTIVE for backwards compatibility old_vm_state = instance.system_metadata.get('old_vm_state', vm_states.ACTIVE) + # NOTE(gibi): this is already set by the resize_instance on the source + # node before calling finish_resize on destination but during upgrade + # it can be that the source node is not having the fix for bug 1944759 + # yet. This assignment can be removed in Z release. instance.old_flavor = old_flavor if old_instance_type_id != new_instance_type_id: diff --git a/nova/tests/functional/libvirt/test_numa_servers.py b/nova/tests/functional/libvirt/test_numa_servers.py index 90afeb763c06..144bad33c82e 100644 --- a/nova/tests/functional/libvirt/test_numa_servers.py +++ b/nova/tests/functional/libvirt/test_numa_servers.py @@ -766,10 +766,10 @@ class NUMAServersTest(NUMAServersTestBase): dst_host = server['OS-EXT-SRV-ATTR:host'] - # This is a resource accounting bug, we should have 2 cpus pinned on - # both computes. The source should have it due to the outbound - # migration and the destination due to the instance running there - self._assert_pinned_cpus(src_host, 0) + # we have 2 cpus pinned on both computes. The source should have it + # due to the outbound migration and the destination due to the + # instance running there + self._assert_pinned_cpus(src_host, 2) self._assert_pinned_cpus(dst_host, 2) return server, src_host, dst_host @@ -781,30 +781,17 @@ class NUMAServersTest(NUMAServersTestBase): # Now confirm the resize post = {'confirmResize': None} - # FIXME(gibi): This is bug 1944759 where during resize, on the source - # node the resize_instance() call at the point of calling finish_resize - # overlaps with a update_available_resources() periodic job. This - # causes that the periodic job will not track the migration nor the - # instance and therefore freeing the resource allocation. Then when - # later the resize is confirmed the confirm_resize on the source - # compute also wants to free up the resources, the pinned CPUs, and it - # fails as they are already freed. - exc = self.assertRaises( - client.OpenStackApiException, - self.api.post_server_action, server['id'], post - ) - self.assertEqual(500, exc.response.status_code) - self.assertIn('CPUUnpinningInvalid', str(exc)) + self.api.post_server_action(server['id'], post) + self._wait_for_state_change(server, 'ACTIVE') - # confirm failed above but the resource allocation reflects that the - # VM is running on the dest node + # the resource allocation reflects that the VM is running on the dest + # node self._assert_pinned_cpus(src_host, 0) self._assert_pinned_cpus(dst_host, 2) + # and running periodics does not break it either self._run_periodics() - # and such allocation situation is stable so as a recovery the VM - # can be reset-state to ACTIVE without problem. self._assert_pinned_cpus(src_host, 0) self._assert_pinned_cpus(dst_host, 2) @@ -820,15 +807,14 @@ class NUMAServersTest(NUMAServersTestBase): self.api.post_server_action(server['id'], post) self._wait_for_state_change(server, 'ACTIVE') - # This is a resource accounting bug. After the revert the source host - # should have 2 cpus pinned due to the instance. - self._assert_pinned_cpus(src_host, 0) + # After the revert the source host should have 2 cpus pinned due to + # the instance. + self._assert_pinned_cpus(src_host, 2) self._assert_pinned_cpus(dst_host, 0) - # running the periodic job will fix the resource accounting + # running the periodic job will not break it either self._run_periodics() - # this is now correct self._assert_pinned_cpus(src_host, 2) self._assert_pinned_cpus(dst_host, 0)