Add recreate test for evacuate claim failure

This adds a functional recreate test for when the MoveClaim
fails on the destination node and the allocation on the
destination node is not cleaned up. This is because the
MoveClaim fails in it's constructor so it never exits the
Claim context manager to call the drop_move_claim which
would remove the destination node allocation.

Eventually we'll have to manually remove the destination
node allocation in the ComputeManager.rebuild_instance method.

Change-Id: I8900ace4436c4837beb8b4eb1e1d05905efc6dce
Related-Bug: #1713786
(cherry picked from commit 6ed80ddcdd)
This commit is contained in:
Matt Riedemann 2017-08-31 22:47:07 -04:00
parent 2115094824
commit d0375c2f9e
1 changed files with 89 additions and 0 deletions

View File

@ -1740,6 +1740,95 @@ class ServerMovingTests(test.TestCase, integrated_helpers.InstanceHelperMixin):
self._delete_and_check_allocations(
server, source_rp_uuid, dest_rp_uuid)
def test_evacuate_claim_on_dest_fails(self):
"""Tests that the allocations on the destination node are cleaned up
when the rebuild move claim fails due to insufficient resources.
"""
source_hostname = self.compute1.host
dest_hostname = self.compute2.host
dest_rp_uuid = self._get_provider_uuid_by_host(dest_hostname)
server = self._boot_and_check_allocations(
self.flavor1, source_hostname)
source_compute_id = self.admin_api.get_services(
host=source_hostname, binary='nova-compute')[0]['id']
self.compute1.stop()
# force it down to avoid waiting for the service group to time out
self.admin_api.put_service(
source_compute_id, {'forced_down': 'true'})
# NOTE(mriedem): This isn't great, and I'd like to fake out the driver
# to make the claim fail, by doing something like returning a too high
# memory_mb overhead, but the limits dict passed to the claim is empty
# so the claim test is considering it as unlimited and never actually
# performs a claim test. Configuring the scheduler to use the RamFilter
# to get the memory_mb limit at least seems like it should work but
# it doesn't appear to for some reason...
def fake_move_claim(*args, **kwargs):
# Assert the destination node allocation exists.
dest_usages = self._get_provider_usages(dest_rp_uuid)
self.assertFlavorMatchesAllocation(self.flavor1, dest_usages)
raise exception.ComputeResourcesUnavailable(
reason='test_evacuate_claim_on_dest_fails')
with mock.patch('nova.compute.claims.MoveClaim', fake_move_claim):
# evacuate the server
self.api.post_server_action(server['id'], {'evacuate': {}})
# the migration will fail on the dest node and the instance will
# go into error state
server = self._wait_for_state_change(self.api, server, 'ERROR')
# Run the periodics to show those don't modify allocations.
self._run_periodics()
# The allocation should still exist on the source node since it's
# still down, and the allocation on the destination node should be
# cleaned up.
source_rp_uuid = self._get_provider_uuid_by_host(source_hostname)
source_usages = self._get_provider_usages(source_rp_uuid)
self.assertFlavorMatchesAllocation(self.flavor1, source_usages)
dest_usages = self._get_provider_usages(dest_rp_uuid)
# FIXME(mriedem): This is bug 1713786 where the claim fails and the
# dest node allocation isn't cleaned up. Uncomment when fixed.
# self.assertFlavorMatchesAllocation(
# {'vcpus': 0, 'ram': 0, 'disk': 0}, dest_usages)
self.assertFlavorMatchesAllocation(self.flavor1, dest_usages)
allocations = self._get_allocations_by_server_uuid(server['id'])
# FIXME(mriedem): Uncomment when bug 1713786 is fixed.
# self.assertEqual(1, len(allocations))
self.assertEqual(2, len(allocations))
source_allocation = allocations[source_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(self.flavor1, source_allocation)
dest_allocation = allocations[dest_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation)
# start up the source compute
self.compute1.start()
self.admin_api.put_service(
source_compute_id, {'forced_down': 'false'})
# Run the periodics again to show they don't change anything.
self._run_periodics()
# The source compute shouldn't have cleaned up the allocation for
# itself since the instance didn't move.
source_usages = self._get_provider_usages(source_rp_uuid)
self.assertFlavorMatchesAllocation(self.flavor1, source_usages)
allocations = self._get_allocations_by_server_uuid(server['id'])
# FIXME(mriedem): Uncomment when bug 1713786 is fixed.
# self.assertEqual(1, len(allocations))
self.assertEqual(2, len(allocations))
source_allocation = allocations[source_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(self.flavor1, source_allocation)
dest_allocation = allocations[dest_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation)
def _boot_then_shelve_and_check_allocations(self, hostname, rp_uuid):
# avoid automatic shelve offloading
self.flags(shelved_offload_time=-1)