Bladeren bron

Add functional recreate test for bug 1829479 and bug 1817833

Change I7b8622b178d5043ed1556d7bdceaf60f47e5ac80 started deleting
the associated resource provider when a compute service is deleted.
However, the delete_resource_provider cascade=True logic only looks
for instances on the given compute service host being deleted which
will miss (1) allocations remaining from evacuated servers and
(2) unconfirmed migrations.

Attempting to delete the resource provider results in an
ResourceProviderInUse error which delete_resource_provider ignores
for legacy reasons. This results in the compute service being
deleted but the resource provider being orphaned. What's more,
attempting to restart the now-deleted compute service will fail
because nova-compute will try to create a new resource provider
with a new uuid but with the same name (based on the hypervisor
hostname). That failure is actually reported in bug 1817833.

NOTE(mriedem): Note that in this backport a simple version of
assertFlavorMatchesUsage is added since the original version from
change If6aa37d9b6b48791e070799ab026c816fda4441c is not in Rocky.

Change-Id: I69f52f1282c8361c9cdf90a523f3612139cb8423
Related-Bug: #1829479
Related-Bug: #1817833
(cherry picked from commit 2629d65fbc)
(cherry picked from commit b18e42d20b)
Matt Riedemann 1 jaar geleden
2 gewijzigde bestanden met toevoegingen van 61 en 0 verwijderingen
  1. +4
  2. +57

+ 4
- 0
nova/tests/functional/ Bestand weergeven

@@ -509,6 +509,10 @@ class ProviderUsageBaseTestCase(test.TestCase, InstanceHelperMixin):
self.assertEqual(old_flavor['disk'] + new_flavor['disk'],

def assertFlavorMatchesUsage(self, rp_uuid, flavor):
usages = self._get_provider_usages(rp_uuid)
self.assertFlavorMatchesAllocation(flavor, usages)

def get_migration_uuid_for_instance(self, instance_uuid):
# NOTE(danms): This is too much introspection for a test like this, but
# we can't see the migration uuid from the API, so we just encapsulate

+ 57
- 0
nova/tests/functional/wsgi/ Bestand weergeven

@@ -116,3 +116,60 @@ class TestServicesAPI(integrated_helpers.ProviderUsageBaseTestCase):
# and allocation information.
resp = self.placement_api.get('/resource_providers/%s' % rp_uuid)
self.assertEqual(404, resp.status)

def test_evacuate_then_delete_compute_service(self):
"""Tests a scenario where a server is created on a host, the host
goes down, the server is evacuated to another host, and then the
source host compute service is deleted. After that the deleted
compute service is restarted. Related placement resources are checked
# Create our source host that we will evacuate *from* later.
host1 = self._start_compute('host1')
# Create a server which will go on host1 since it is the only host.
flavor = self.api.get_flavors()[0]
server = self._boot_and_check_allocations(flavor, 'host1')
# Get the compute service record for host1 so we can manage it.
service = self.admin_api.get_services(
binary='nova-compute', host='host1')[0]
# Get the corresponding resource provider uuid for host1.
rp_uuid = self._get_provider_uuid_by_host(service['host'])
# Make sure there is a resource provider for that compute node based
# on the uuid.
resp = self.placement_api.get('/resource_providers/%s' % rp_uuid)
self.assertEqual(200, resp.status)
# Down the compute service for host1 so we can evacuate from it.
self.admin_api.put_service(service['id'], {'forced_down': True})
# Start another host and trigger the server evacuate to that host.
self.admin_api.post_server_action(server['id'], {'evacuate': {}})
# The host does not change until after the status is changed to ACTIVE
# so wait for both parameters.
self.admin_api, server, {'status': 'ACTIVE',
'OS-EXT-SRV-ATTR:host': 'host2'})
# Delete the compute service for host1 and check the related
# placement resources for that host.
self.admin_api.api_delete('/os-services/%s' % service['id'])
# Make sure the service is gone.
services = self.admin_api.get_services(
binary='nova-compute', host='host1')
self.assertEqual(0, len(services), services)
# FIXME(mriedem): This is bug 1829479 where the compute service is
# deleted but the resource provider is not because there are still
# allocations against the provider from the evacuated server.
resp = self.placement_api.get('/resource_providers/%s' % rp_uuid)
self.assertEqual(200, resp.status)
self.assertFlavorMatchesUsage(rp_uuid, flavor)
# Try to restart the host1 compute service to create a new resource
# provider.
# FIXME(mriedem): This is bug 1817833 where restarting the now-deleted
# compute service attempts to create a new resource provider with a
# new uuid but the same name which results in a conflict. The service
# does not die, however, because _update_available_resource_for_node
# catches and logs but does not re-raise the error.
log_output = self.stdlog.logger.output
self.assertIn('Error updating resources for node host1.', log_output)
self.assertIn('Failed to create resource provider host1', log_output)