From 6eda7409fff75449c97843b2d6ead0b3267a1099 Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Thu, 6 Jun 2019 13:41:09 -0400 Subject: [PATCH] Add functional recreate test for bug 1829479 and bug 1817833 Change I7b8622b178d5043ed1556d7bdceaf60f47e5ac80 started deleting the associated resource provider when a compute service is deleted. However, the delete_resource_provider cascade=True logic only looks for instances on the given compute service host being deleted which will miss (1) allocations remaining from evacuated servers and (2) unconfirmed migrations. Attempting to delete the resource provider results in an ResourceProviderInUse error which delete_resource_provider ignores for legacy reasons. This results in the compute service being deleted but the resource provider being orphaned. What's more, attempting to restart the now-deleted compute service will fail because nova-compute will try to create a new resource provider with a new uuid but with the same name (based on the hypervisor hostname). That failure is actually reported in bug 1817833. NOTE(mriedem): Note that in this backport a simple version of assertFlavorMatchesUsage is added since the original version from change If6aa37d9b6b48791e070799ab026c816fda4441c is not in Rocky. Change-Id: I69f52f1282c8361c9cdf90a523f3612139cb8423 Related-Bug: #1829479 Related-Bug: #1817833 (cherry picked from commit 2629d65fbc15d8698f98117e0d6072810f70da03) (cherry picked from commit b18e42d20bd7d341e713292bdb179ae8e5530d33) --- nova/tests/functional/integrated_helpers.py | 4 ++ nova/tests/functional/wsgi/test_services.py | 57 +++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/nova/tests/functional/integrated_helpers.py b/nova/tests/functional/integrated_helpers.py index cd7e98707eaf..7c12269495be 100644 --- a/nova/tests/functional/integrated_helpers.py +++ b/nova/tests/functional/integrated_helpers.py @@ -509,6 +509,10 @@ class ProviderUsageBaseTestCase(test.TestCase, InstanceHelperMixin): self.assertEqual(old_flavor['disk'] + new_flavor['disk'], allocation['DISK_GB']) + def assertFlavorMatchesUsage(self, rp_uuid, flavor): + usages = self._get_provider_usages(rp_uuid) + self.assertFlavorMatchesAllocation(flavor, usages) + def get_migration_uuid_for_instance(self, instance_uuid): # NOTE(danms): This is too much introspection for a test like this, but # we can't see the migration uuid from the API, so we just encapsulate diff --git a/nova/tests/functional/wsgi/test_services.py b/nova/tests/functional/wsgi/test_services.py index 9a6f00a5898d..5d97bcc32afa 100644 --- a/nova/tests/functional/wsgi/test_services.py +++ b/nova/tests/functional/wsgi/test_services.py @@ -116,3 +116,60 @@ class TestServicesAPI(integrated_helpers.ProviderUsageBaseTestCase): # and allocation information. resp = self.placement_api.get('/resource_providers/%s' % rp_uuid) self.assertEqual(404, resp.status) + + def test_evacuate_then_delete_compute_service(self): + """Tests a scenario where a server is created on a host, the host + goes down, the server is evacuated to another host, and then the + source host compute service is deleted. After that the deleted + compute service is restarted. Related placement resources are checked + throughout. + """ + # Create our source host that we will evacuate *from* later. + host1 = self._start_compute('host1') + # Create a server which will go on host1 since it is the only host. + flavor = self.api.get_flavors()[0] + server = self._boot_and_check_allocations(flavor, 'host1') + # Get the compute service record for host1 so we can manage it. + service = self.admin_api.get_services( + binary='nova-compute', host='host1')[0] + # Get the corresponding resource provider uuid for host1. + rp_uuid = self._get_provider_uuid_by_host(service['host']) + # Make sure there is a resource provider for that compute node based + # on the uuid. + resp = self.placement_api.get('/resource_providers/%s' % rp_uuid) + self.assertEqual(200, resp.status) + # Down the compute service for host1 so we can evacuate from it. + self.admin_api.put_service(service['id'], {'forced_down': True}) + host1.stop() + # Start another host and trigger the server evacuate to that host. + self._start_compute('host2') + self.admin_api.post_server_action(server['id'], {'evacuate': {}}) + # The host does not change until after the status is changed to ACTIVE + # so wait for both parameters. + self._wait_for_server_parameter( + self.admin_api, server, {'status': 'ACTIVE', + 'OS-EXT-SRV-ATTR:host': 'host2'}) + # Delete the compute service for host1 and check the related + # placement resources for that host. + self.admin_api.api_delete('/os-services/%s' % service['id']) + # Make sure the service is gone. + services = self.admin_api.get_services( + binary='nova-compute', host='host1') + self.assertEqual(0, len(services), services) + # FIXME(mriedem): This is bug 1829479 where the compute service is + # deleted but the resource provider is not because there are still + # allocations against the provider from the evacuated server. + resp = self.placement_api.get('/resource_providers/%s' % rp_uuid) + self.assertEqual(200, resp.status) + self.assertFlavorMatchesUsage(rp_uuid, flavor) + # Try to restart the host1 compute service to create a new resource + # provider. + self.restart_compute_service(host1) + # FIXME(mriedem): This is bug 1817833 where restarting the now-deleted + # compute service attempts to create a new resource provider with a + # new uuid but the same name which results in a conflict. The service + # does not die, however, because _update_available_resource_for_node + # catches and logs but does not re-raise the error. + log_output = self.stdlog.logger.output + self.assertIn('Error updating resources for node host1.', log_output) + self.assertIn('Failed to create resource provider host1', log_output)