From 1563a15c8b4bcf1a602f72a549c8d9c56ed7da4e Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Thu, 14 Nov 2019 11:38:07 -0500 Subject: [PATCH] Add functional recreate test for bug 1852610 It is possible to delete a source compute service which has pending migration-based allocations and servers in VERIFY_RESIZE status. Doing so deletes the compute service and compute node but orphans the source node resource provider along with its resource allocations held by the migration record while there is a pending resized server. This adds a simple cold migrate test which deletes the source compute service while the server is in VERIFY_RESIZE status and then tries to confirm the resize which fails. NOTE(mriedem): A couple of methods are lifted from ServerMovingTests since change Ie991d4b53e9bb5e7ec26da99219178ab7695abf6 is not in Rocky. Change-Id: I644608b4e197ddea31c5f264adb492f2c8931f04 Related-Bug: #1852610 (cherry picked from commit 94d3743b185d22c07504f5d878dff2f9ef42cee3) (cherry picked from commit 28d76cc7ae5c86d251915392b5b961a975b343ae) (cherry picked from commit 7d673872462f53d0ce5e651263253ec4057a2138) --- nova/tests/functional/integrated_helpers.py | 48 +++++++++++++++++++++ nova/tests/functional/test_servers.py | 47 -------------------- nova/tests/functional/wsgi/test_services.py | 35 +++++++++++++++ 3 files changed, 83 insertions(+), 47 deletions(-) diff --git a/nova/tests/functional/integrated_helpers.py b/nova/tests/functional/integrated_helpers.py index 7c12269495be..b571601f3171 100644 --- a/nova/tests/functional/integrated_helpers.py +++ b/nova/tests/functional/integrated_helpers.py @@ -27,6 +27,7 @@ import nova.conf from nova import context from nova.db import api as db import nova.image.glance +from nova import objects from nova import test from nova.tests import fixtures as nova_fixtures from nova.tests.functional.api import client as api_client @@ -647,3 +648,50 @@ class ProviderUsageBaseTestCase(test.TestCase, InstanceHelperMixin): compute.manager.host) compute.manager.update_available_resource(ctx) LOG.info('Finished with periodics') + + def _move_and_check_allocations(self, server, request, old_flavor, + new_flavor, source_rp_uuid, dest_rp_uuid): + self.api.post_server_action(server['id'], request) + self._wait_for_state_change(self.api, server, 'VERIFY_RESIZE') + + def _check_allocation(): + source_usages = self._get_provider_usages(source_rp_uuid) + self.assertFlavorMatchesAllocation(old_flavor, source_usages) + dest_usages = self._get_provider_usages(dest_rp_uuid) + self.assertFlavorMatchesAllocation(new_flavor, dest_usages) + + # The instance should own the new_flavor allocation against the + # destination host created by the scheduler + allocations = self._get_allocations_by_server_uuid(server['id']) + self.assertEqual(1, len(allocations)) + dest_alloc = allocations[dest_rp_uuid]['resources'] + self.assertFlavorMatchesAllocation(new_flavor, dest_alloc) + + # The migration should own the old_flavor allocation against the + # source host created by conductor + migration_uuid = self.get_migration_uuid_for_instance(server['id']) + allocations = self._get_allocations_by_server_uuid(migration_uuid) + source_alloc = allocations[source_rp_uuid]['resources'] + self.assertFlavorMatchesAllocation(old_flavor, source_alloc) + + # OK, so the move operation has run, but we have not yet confirmed or + # reverted the move operation. Before we run periodics, make sure + # that we have allocations/usages on BOTH the source and the + # destination hosts. + _check_allocation() + self._run_periodics() + _check_allocation() + + # Make sure the RequestSpec.flavor matches the new_flavor. + ctxt = context.get_admin_context() + reqspec = objects.RequestSpec.get_by_instance_uuid(ctxt, server['id']) + self.assertEqual(new_flavor['id'], reqspec.flavor.flavorid) + + def _migrate_and_check_allocations(self, server, flavor, source_rp_uuid, + dest_rp_uuid): + request = { + 'migrate': None + } + self._move_and_check_allocations( + server, request=request, old_flavor=flavor, new_flavor=flavor, + source_rp_uuid=source_rp_uuid, dest_rp_uuid=dest_rp_uuid) diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index 81d7fb385ac4..27d97d10ab2b 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -1766,53 +1766,6 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase): self.compute2.manager.update_available_resource(ctx) LOG.info('Finished with periodics') - def _migrate_and_check_allocations(self, server, flavor, source_rp_uuid, - dest_rp_uuid): - request = { - 'migrate': None - } - self._move_and_check_allocations( - server, request=request, old_flavor=flavor, new_flavor=flavor, - source_rp_uuid=source_rp_uuid, dest_rp_uuid=dest_rp_uuid) - - def _move_and_check_allocations(self, server, request, old_flavor, - new_flavor, source_rp_uuid, dest_rp_uuid): - self.api.post_server_action(server['id'], request) - self._wait_for_state_change(self.api, server, 'VERIFY_RESIZE') - - def _check_allocation(): - source_usages = self._get_provider_usages(source_rp_uuid) - self.assertFlavorMatchesAllocation(old_flavor, source_usages) - dest_usages = self._get_provider_usages(dest_rp_uuid) - self.assertFlavorMatchesAllocation(new_flavor, dest_usages) - - # The instance should own the new_flavor allocation against the - # destination host created by the scheduler - allocations = self._get_allocations_by_server_uuid(server['id']) - self.assertEqual(1, len(allocations)) - dest_alloc = allocations[dest_rp_uuid]['resources'] - self.assertFlavorMatchesAllocation(new_flavor, dest_alloc) - - # The migration should own the old_flavor allocation against the - # source host created by conductor - migration_uuid = self.get_migration_uuid_for_instance(server['id']) - allocations = self._get_allocations_by_server_uuid(migration_uuid) - source_alloc = allocations[source_rp_uuid]['resources'] - self.assertFlavorMatchesAllocation(old_flavor, source_alloc) - - # OK, so the move operation has run, but we have not yet confirmed or - # reverted the move operation. Before we run periodics, make sure - # that we have allocations/usages on BOTH the source and the - # destination hosts. - _check_allocation() - self._run_periodics() - _check_allocation() - - # Make sure the RequestSpec.flavor matches the new_flavor. - ctxt = context.get_admin_context() - reqspec = objects.RequestSpec.get_by_instance_uuid(ctxt, server['id']) - self.assertEqual(new_flavor['id'], reqspec.flavor.flavorid) - def test_resize_revert(self): self._test_resize_revert(dest_hostname='host1') diff --git a/nova/tests/functional/wsgi/test_services.py b/nova/tests/functional/wsgi/test_services.py index 5d97bcc32afa..dd18bec3a478 100644 --- a/nova/tests/functional/wsgi/test_services.py +++ b/nova/tests/functional/wsgi/test_services.py @@ -173,3 +173,38 @@ class TestServicesAPI(integrated_helpers.ProviderUsageBaseTestCase): log_output = self.stdlog.logger.output self.assertIn('Error updating resources for node host1.', log_output) self.assertIn('Failed to create resource provider host1', log_output) + + def test_migrate_confirm_after_deleted_source_compute(self): + """Tests a scenario where a server is cold migrated and while in + VERIFY_RESIZE status the admin attempts to delete the source compute + and then the user tries to confirm the resize. + """ + # Start a compute service and create a server there. + self._start_compute('host1') + host1_rp_uuid = self._get_provider_uuid_by_host('host1') + flavor = self.api.get_flavors()[0] + server = self._boot_and_check_allocations(flavor, 'host1') + # Start a second compute service so we can cold migrate there. + self._start_compute('host2') + host2_rp_uuid = self._get_provider_uuid_by_host('host2') + # Cold migrate the server to host2. + self._migrate_and_check_allocations( + server, flavor, host1_rp_uuid, host2_rp_uuid) + # Delete the source compute service. + service = self.admin_api.get_services( + binary='nova-compute', host='host1')[0] + self.admin_api.api_delete('/os-services/%s' % service['id']) + # FIXME(mriedem): This is bug 1852610 where the compute service is + # deleted but the resource provider is not because there are still + # migration-based allocations against the source node provider. + resp = self.placement_api.get('/resource_providers/%s' % host1_rp_uuid) + self.assertEqual(200, resp.status) + self.assertFlavorMatchesUsage(host1_rp_uuid, flavor) + # Now try to confirm the migration. + # FIXME(mriedem): This will fail until bug 1852610 is fixed and the + # source compute service delete is blocked while there is an + # in-progress migration involving the node. + self.assertNotIn('ComputeHostNotFound', self.stdlog.logger.output) + self.api.post_server_action(server['id'], {'confirmResize': None}) + self._wait_for_state_change(self.api, server, 'ERROR') + self.assertIn('ComputeHostNotFound', self.stdlog.logger.output)