Merge "Store old_flavor already on source host during resize"

This commit is contained in:
Zuul
2021-09-30 20:07:23 +00:00
committed by Gerrit Code Review
2 changed files with 25 additions and 27 deletions

View File

@@ -5635,6 +5635,14 @@ class ComputeManager(manager.Manager):
instance.host = migration.dest_compute
instance.node = migration.dest_node
# NOTE(gibi): as the instance now tracked on the destination we
# have to make sure that the source compute resource track can
# track this instance as a migration. For that the resource tracker
# needs to see the old_flavor set on the instance. The old_flavor
# setting used to be done on the destination host in finish_resize
# but that is racy with a source host update_available_resource
# periodic run
instance.old_flavor = instance.flavor
instance.task_state = task_states.RESIZE_MIGRATED
instance.save(expected_task_state=task_states.RESIZE_MIGRATING)
@@ -5748,6 +5756,10 @@ class ComputeManager(manager.Manager):
# to ACTIVE for backwards compatibility
old_vm_state = instance.system_metadata.get('old_vm_state',
vm_states.ACTIVE)
# NOTE(gibi): this is already set by the resize_instance on the source
# node before calling finish_resize on destination but during upgrade
# it can be that the source node is not having the fix for bug 1944759
# yet. This assignment can be removed in Z release.
instance.old_flavor = old_flavor
if old_instance_type_id != new_instance_type_id:

View File

@@ -877,10 +877,10 @@ class NUMAServersTest(NUMAServersTestBase):
dst_host = server['OS-EXT-SRV-ATTR:host']
# This is a resource accounting bug, we should have 2 cpus pinned on
# both computes. The source should have it due to the outbound
# migration and the destination due to the instance running there
self._assert_pinned_cpus(src_host, 0)
# we have 2 cpus pinned on both computes. The source should have it
# due to the outbound migration and the destination due to the
# instance running there
self._assert_pinned_cpus(src_host, 2)
self._assert_pinned_cpus(dst_host, 2)
return server, src_host, dst_host
@@ -892,30 +892,17 @@ class NUMAServersTest(NUMAServersTestBase):
# Now confirm the resize
post = {'confirmResize': None}
# FIXME(gibi): This is bug 1944759 where during resize, on the source
# node the resize_instance() call at the point of calling finish_resize
# overlaps with a update_available_resources() periodic job. This
# causes that the periodic job will not track the migration nor the
# instance and therefore freeing the resource allocation. Then when
# later the resize is confirmed the confirm_resize on the source
# compute also wants to free up the resources, the pinned CPUs, and it
# fails as they are already freed.
exc = self.assertRaises(
client.OpenStackApiException,
self.api.post_server_action, server['id'], post
)
self.assertEqual(500, exc.response.status_code)
self.assertIn('CPUUnpinningInvalid', str(exc))
self.api.post_server_action(server['id'], post)
self._wait_for_state_change(server, 'ACTIVE')
# confirm failed above but the resource allocation reflects that the
# VM is running on the dest node
# the resource allocation reflects that the VM is running on the dest
# node
self._assert_pinned_cpus(src_host, 0)
self._assert_pinned_cpus(dst_host, 2)
# and running periodics does not break it either
self._run_periodics()
# and such allocation situation is stable so as a recovery the VM
# can be reset-state to ACTIVE without problem.
self._assert_pinned_cpus(src_host, 0)
self._assert_pinned_cpus(dst_host, 2)
@@ -931,15 +918,14 @@ class NUMAServersTest(NUMAServersTestBase):
self.api.post_server_action(server['id'], post)
self._wait_for_state_change(server, 'ACTIVE')
# This is a resource accounting bug. After the revert the source host
# should have 2 cpus pinned due to the instance.
self._assert_pinned_cpus(src_host, 0)
# After the revert the source host should have 2 cpus pinned due to
# the instance.
self._assert_pinned_cpus(src_host, 2)
self._assert_pinned_cpus(dst_host, 0)
# running the periodic job will fix the resource accounting
# running the periodic job will not break it either
self._run_periodics()
# this is now correct
self._assert_pinned_cpus(src_host, 2)
self._assert_pinned_cpus(dst_host, 0)