# Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. import os_resource_classes as orc import os_traits import six from nova import context as nova_context from nova import exception from nova import objects from nova.tests.functional.api import client as api_client from nova.tests.functional import integrated_helpers from nova.tests.unit.image import fake as fake_image from nova import utils class TestServicesAPI(integrated_helpers.ProviderUsageBaseTestCase): compute_driver = 'fake.SmallFakeDriver' def test_compute_service_delete_ensure_related_cleanup(self): """Tests deleting a compute service and the related cleanup associated with that like the compute_nodes table entry, removing the host from any aggregates, the host mapping in the API DB and the associated resource provider in Placement. """ compute = self._start_compute('host1') # Make sure our compute host is represented as expected. services = self.admin_api.get_services(binary='nova-compute') self.assertEqual(1, len(services)) service = services[0] # Now create a host aggregate and add our host to it. aggregate = self.admin_api.post_aggregate( {'aggregate': {'name': 'agg1'}}) self.admin_api.add_host_to_aggregate(aggregate['id'], service['host']) # Make sure the host is in the aggregate. aggregate = self.admin_api.api_get( '/os-aggregates/%s' % aggregate['id']).body['aggregate'] self.assertEqual([service['host']], aggregate['hosts']) rp_uuid = self._get_provider_uuid_by_host(service['host']) # We'll know there is a host mapping implicitly if os-hypervisors # returned something in _get_provider_uuid_by_host, but let's also # make sure the host mapping is there like we expect. ctxt = nova_context.get_admin_context() objects.HostMapping.get_by_host(ctxt, service['host']) # Make sure there is a resource provider for that compute node based # on the uuid. resp = self.placement_api.get('/resource_providers/%s' % rp_uuid) self.assertEqual(200, resp.status) # Make sure the resource provider has inventory. inventories = self._get_provider_inventory(rp_uuid) # Expect a minimal set of inventory for the fake virt driver. for resource_class in [orc.VCPU, orc.MEMORY_MB, orc.DISK_GB]: self.assertIn(resource_class, inventories) # Now create a server so that the resource provider has some allocation # records. flavor = self.api.get_flavors()[0] server = self._boot_and_check_allocations(flavor, service['host']) # Now the fun part, delete the compute service and make sure related # resources are cleaned up, like the compute node, host mapping, and # resource provider. We have to first stop the compute service so # it doesn't recreate the compute node during the # update_available_resource periodic task. self.admin_api.put_service(service['id'], {'forced_down': True}) compute.stop() # The first attempt should fail since there is an instance on the # compute host. ex = self.assertRaises(api_client.OpenStackApiException, self.admin_api.api_delete, '/os-services/%s' % service['id']) self.assertIn('Unable to delete compute service that is hosting ' 'instances.', six.text_type(ex)) self.assertEqual(409, ex.response.status_code) # Now delete the instance and wait for it to be gone. self._delete_and_check_allocations(server) # Now we can delete the service. self.admin_api.api_delete('/os-services/%s' % service['id']) # Make sure the service is deleted. services = self.admin_api.get_services(binary='nova-compute') self.assertEqual(0, len(services)) # Make sure the host was removed from the aggregate. aggregate = self.admin_api.api_get( '/os-aggregates/%s' % aggregate['id']).body['aggregate'] self.assertEqual([], aggregate['hosts']) # Trying to get the hypervisor should result in a 404. self.admin_api.api_get( 'os-hypervisors?hypervisor_hostname_pattern=%s' % service['host'], check_response_status=[404]) # The host mapping should also be gone. self.assertRaises(exception.HostMappingNotFound, objects.HostMapping.get_by_host, ctxt, service['host']) # And finally, the resource provider should also be gone. The API # will perform a cascading delete of the resource provider inventory # and allocation information. resp = self.placement_api.get('/resource_providers/%s' % rp_uuid) self.assertEqual(404, resp.status) def test_evacuate_then_delete_compute_service(self): """Tests a scenario where a server is created on a host, the host goes down, the server is evacuated to another host, and then the source host compute service is deleted. After that the deleted compute service is restarted. Related placement resources are checked throughout. """ # Create our source host that we will evacuate *from* later. host1 = self._start_compute('host1') # Create a server which will go on host1 since it is the only host. flavor = self.api.get_flavors()[0] server = self._boot_and_check_allocations(flavor, 'host1') # Get the compute service record for host1 so we can manage it. service = self.admin_api.get_services( binary='nova-compute', host='host1')[0] # Get the corresponding resource provider uuid for host1. rp_uuid = self._get_provider_uuid_by_host(service['host']) # Make sure there is a resource provider for that compute node based # on the uuid. resp = self.placement_api.get('/resource_providers/%s' % rp_uuid) self.assertEqual(200, resp.status) # Down the compute service for host1 so we can evacuate from it. self.admin_api.put_service(service['id'], {'forced_down': True}) host1.stop() # Start another host and trigger the server evacuate to that host. self._start_compute('host2') self.admin_api.post_server_action(server['id'], {'evacuate': {}}) # The host does not change until after the status is changed to ACTIVE # so wait for both parameters. self._wait_for_server_parameter( self.admin_api, server, {'status': 'ACTIVE', 'OS-EXT-SRV-ATTR:host': 'host2'}) # Delete the compute service for host1 and check the related # placement resources for that host. self.admin_api.api_delete('/os-services/%s' % service['id']) # Make sure the service is gone. services = self.admin_api.get_services( binary='nova-compute', host='host1') self.assertEqual(0, len(services), services) # FIXME(mriedem): This is bug 1829479 where the compute service is # deleted but the resource provider is not because there are still # allocations against the provider from the evacuated server. resp = self.placement_api.get('/resource_providers/%s' % rp_uuid) self.assertEqual(200, resp.status) self.assertFlavorMatchesUsage(rp_uuid, flavor) # Try to restart the host1 compute service to create a new resource # provider. self.restart_compute_service(host1) # FIXME(mriedem): This is bug 1817833 where restarting the now-deleted # compute service attempts to create a new resource provider with a # new uuid but the same name which results in a conflict. The service # does not die, however, because _update_available_resource_for_node # catches and logs but does not re-raise the error. log_output = self.stdlog.logger.output self.assertIn('Error updating resources for node host1.', log_output) self.assertIn('Failed to create resource provider host1', log_output) def test_migrate_confirm_after_deleted_source_compute(self): """Tests a scenario where a server is cold migrated and while in VERIFY_RESIZE status the admin attempts to delete the source compute and then the user tries to confirm the resize. """ # Start a compute service and create a server there. self._start_compute('host1') host1_rp_uuid = self._get_provider_uuid_by_host('host1') flavor = self.api.get_flavors()[0] server = self._boot_and_check_allocations(flavor, 'host1') # Start a second compute service so we can cold migrate there. self._start_compute('host2') host2_rp_uuid = self._get_provider_uuid_by_host('host2') # Cold migrate the server to host2. self._migrate_and_check_allocations( server, flavor, host1_rp_uuid, host2_rp_uuid) # Delete the source compute service. service = self.admin_api.get_services( binary='nova-compute', host='host1')[0] self.admin_api.api_delete('/os-services/%s' % service['id']) # FIXME(mriedem): This is bug 1852610 where the compute service is # deleted but the resource provider is not because there are still # migration-based allocations against the source node provider. resp = self.placement_api.get('/resource_providers/%s' % host1_rp_uuid) self.assertEqual(200, resp.status) self.assertFlavorMatchesUsage(host1_rp_uuid, flavor) # Now try to confirm the migration. # FIXME(mriedem): This will fail until bug 1852610 is fixed and the # source compute service delete is blocked while there is an # in-progress migration involving the node. self.assertNotIn('ComputeHostNotFound', self.stdlog.logger.output) self.api.post_server_action(server['id'], {'confirmResize': None}) self._wait_for_state_change(self.api, server, 'ERROR') self.assertIn('ComputeHostNotFound', self.stdlog.logger.output) class ComputeStatusFilterTest(integrated_helpers.ProviderUsageBaseTestCase): """Tests the API, compute service and Placement interaction with the COMPUTE_STATUS_DISABLED trait when a compute service is enable/disabled. This version of the test uses the 2.latest microversion for testing the 2.53+ behavior of the PUT /os-services/{service_id} API. """ compute_driver = 'fake.SmallFakeDriver' def _update_service(self, service, disabled, forced_down=None): """Update the service using the 2.53 request schema. :param service: dict representing the service resource in the API :param disabled: True if the service should be disabled, False if the service should be enabled :param forced_down: Optionally change the forced_down value. """ status = 'disabled' if disabled else 'enabled' req = {'status': status} if forced_down is not None: req['forced_down'] = forced_down self.admin_api.put_service(service['id'], req) def test_compute_status_filter(self): """Tests the compute_status_filter placement request filter""" # Start a compute service so a compute node and resource provider is # created. compute = self._start_compute('host1') # Get the UUID of the resource provider that was created. rp_uuid = self._get_provider_uuid_by_host('host1') # Get the service from the compute API. services = self.admin_api.get_services(binary='nova-compute', host='host1') self.assertEqual(1, len(services)) service = services[0] # At this point, the service should be enabled and the # COMPUTE_STATUS_DISABLED trait should not be set on the # resource provider in placement. self.assertEqual('enabled', service['status']) rp_traits = self._get_provider_traits(rp_uuid) trait = os_traits.COMPUTE_STATUS_DISABLED self.assertNotIn(trait, rp_traits) # Now disable the compute service via the API. self._update_service(service, disabled=True) # The update to placement should be synchronous so check the provider # traits and COMPUTE_STATUS_DISABLED should be set. rp_traits = self._get_provider_traits(rp_uuid) self.assertIn(trait, rp_traits) # Try creating a server which should fail because nothing is available. networks = [{'port': self.neutron.port_1['id']}] server_req = self._build_minimal_create_server_request( self.api, 'test_compute_status_filter', image_uuid=fake_image.get_valid_image_id(), networks=networks) server = self.api.post_server({'server': server_req}) server = self._wait_for_state_change(self.api, server, 'ERROR') # There should be a NoValidHost fault recorded. self.assertIn('fault', server) self.assertIn('No valid host', server['fault']['message']) # Now enable the service and the trait should be gone. self._update_service(service, disabled=False) rp_traits = self._get_provider_traits(rp_uuid) self.assertNotIn(trait, rp_traits) # Try creating another server and it should be OK. server = self.api.post_server({'server': server_req}) self._wait_for_state_change(self.api, server, 'ACTIVE') # Stop, force-down and disable the service so the API cannot call # the compute service to sync the trait. compute.stop() self._update_service(service, disabled=True, forced_down=True) # The API should have logged a message about the service being down. self.assertIn('Compute service on host host1 is down. The ' 'COMPUTE_STATUS_DISABLED trait will be synchronized ' 'when the service is restarted.', self.stdlog.logger.output) # The trait should not be on the provider even though the node is # disabled. rp_traits = self._get_provider_traits(rp_uuid) self.assertNotIn(trait, rp_traits) # Restart the compute service which should sync and set the trait on # the provider in placement. self.restart_compute_service(compute) rp_traits = self._get_provider_traits(rp_uuid) self.assertIn(trait, rp_traits) class ComputeStatusFilterTest211(ComputeStatusFilterTest): """Extends ComputeStatusFilterTest and uses the 2.11 API for the legacy os-services disable/enable/force-down API behavior """ microversion = '2.11' def _update_service(self, service, disabled, forced_down=None): """Update the service using the 2.11 request schema. :param service: dict representing the service resource in the API :param disabled: True if the service should be disabled, False if the service should be enabled :param forced_down: Optionally change the forced_down value. """ # Before 2.53 the service is uniquely identified by host and binary. body = { 'host': service['host'], 'binary': service['binary'] } # Handle forced_down first if provided since the enable/disable # behavior in the API depends on it. if forced_down is not None: body['forced_down'] = forced_down self.admin_api.api_put('/os-services/force-down', body) if disabled: self.admin_api.api_put('/os-services/disable', body) else: self.admin_api.api_put('/os-services/enable', body) def _get_provider_uuid_by_host(self, host): # We have to temporarily mutate to 2.53 to get the hypervisor UUID. with utils.temporary_mutation(self.admin_api, microversion='2.53'): return super(ComputeStatusFilterTest211, self)._get_provider_uuid_by_host(host)