diff --git a/api-ref/source/os-services.inc b/api-ref/source/os-services.inc index 298dc0750e3e..0f2c5cbc2711 100644 --- a/api-ref/source/os-services.inc +++ b/api-ref/source/os-services.inc @@ -318,6 +318,10 @@ Delete Compute Service Deletes a service. If it's a ``nova-compute`` service, then the corresponding host will be removed from all the host aggregates as well. +Attempts to delete a ``nova-compute`` service which is still hosting instances +will result in a 409 HTTPConflict response. The instances will need to be +migrated or deleted before a compute service can be deleted. + .. important:: Be sure to stop the actual ``nova-compute`` process on the physical host *before* deleting the service with this API. Failing to do so can lead to the running service re-creating @@ -325,7 +329,8 @@ corresponding host will be removed from all the host aggregates as well. Normal response codes: 204 -Error response codes: badRequest(400), unauthorized(401), forbidden(403), itemNotFound(404) +Error response codes: badRequest(400), unauthorized(401), forbidden(403), +itemNotFound(404), conflict(409) Request ------- diff --git a/nova/api/openstack/compute/services.py b/nova/api/openstack/compute/services.py index 02816f3cca7d..22b006e71960 100644 --- a/nova/api/openstack/compute/services.py +++ b/nova/api/openstack/compute/services.py @@ -24,6 +24,7 @@ from nova import availability_zones from nova import compute from nova import exception from nova.i18n import _ +from nova import objects from nova.policies import services as services_policies from nova import servicegroup from nova import utils @@ -189,7 +190,7 @@ class ServiceController(wsgi.Controller): return action(body, context) @wsgi.response(204) - @wsgi.expected_errors((400, 404)) + @wsgi.expected_errors((400, 404, 409)) def delete(self, req, id): """Deletes the specified service.""" context = req.environ['nova.context'] @@ -211,6 +212,21 @@ class ServiceController(wsgi.Controller): service = self.host_api.service_get_by_id(context, id) # remove the service from all the aggregates in which it's included if service.binary == 'nova-compute': + # Check to see if there are any instances on this compute host + # because if there are, we need to block the service (and + # related compute_nodes record) delete since it will impact + # resource accounting in Placement and orphan the compute node + # resource provider. + # TODO(mriedem): Use a COUNT SQL query-based function instead + # of InstanceList.get_uuids_by_host for performance. + instance_uuids = objects.InstanceList.get_uuids_by_host( + context, service['host']) + if instance_uuids: + raise webob.exc.HTTPConflict( + explanation=_('Unable to delete compute service that ' + 'is hosting instances. Migrate or ' + 'delete the instances first.')) + aggrs = self.aggregate_api.get_aggregates_by_host(context, service.host) for ag in aggrs: diff --git a/nova/tests/functional/wsgi/test_services.py b/nova/tests/functional/wsgi/test_services.py index 3db9e5290d5c..6e944c458383 100644 --- a/nova/tests/functional/wsgi/test_services.py +++ b/nova/tests/functional/wsgi/test_services.py @@ -10,9 +10,12 @@ # License for the specific language governing permissions and limitations # under the License. +import six + from nova import context as nova_context from nova import objects from nova import rc_fields +from nova.tests.functional.api import client as api_client from nova.tests.functional import test_servers @@ -73,12 +76,24 @@ class TestServicesAPI(test_servers.ProviderUsageBaseTestCase): # update_available_resource periodic task. self.admin_api.put_service(service['id'], {'forced_down': True}) compute.stop() - # FIXME(mriedem): This is bug 1763183 where the compute node has - # an instance running on it but we allow you to delete the service - # and compute node anyway, which will affect the allocations for the - # instance and orphans the compute node resource provider in Placement. - # Once the bug is fixed, this should fail until the instance is either - # migrated or deleted. + # The first attempt should fail since there is an instance on the + # compute host. + ex = self.assertRaises(api_client.OpenStackApiException, + self.admin_api.api_delete, + '/os-services/%s' % service['id']) + self.assertIn('Unable to delete compute service that is hosting ' + 'instances.', six.text_type(ex)) + self.assertEqual(409, ex.response.status_code) + + # Now delete the instance and wait for it to be gone. + # Note that we can't use self._delete_and_check_allocations here + # because of bug 1679750 where allocations are not deleted when + # an instance is deleted and the compute service it's running on + # is down. + self.api.delete_server(server['id']) + self._wait_until_deleted(server) + + # Now we can delete the service. self.admin_api.api_delete('/os-services/%s' % service['id']) # Make sure the service is deleted. diff --git a/releasenotes/notes/bug-1763183-service-delete-with-instances-d7c5c47e4ce31239.yaml b/releasenotes/notes/bug-1763183-service-delete-with-instances-d7c5c47e4ce31239.yaml new file mode 100644 index 000000000000..72af00754795 --- /dev/null +++ b/releasenotes/notes/bug-1763183-service-delete-with-instances-d7c5c47e4ce31239.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + The ``DELETE /os-services/{service_id}`` compute API will now return a + ``409 HTTPConflict`` response when trying to delete a ``nova-compute`` + service which is still hosting instances. This is because doing so would + orphan the compute node resource provider in the placement service on + which those instances have resource allocations, which affects scheduling. + See https://bugs.launchpad.net/nova/+bug/1763183 for more details.