Merge "api: Reject requests to force up computes when `done` evacuation records exist"

This commit is contained in:
Zuul 2021-04-15 09:44:19 +00:00 committed by Gerrit Code Review
commit 5eab13030b
4 changed files with 40 additions and 12 deletions

View File

@ -189,6 +189,9 @@ class ServiceController(wsgi.Controller):
host = body['host'] host = body['host']
binary = body['binary'] binary = body['binary']
if binary == 'nova-compute' and forced_down is False:
self._check_for_evacuations(context, host)
ret_value = {'service': {'host': host, ret_value = {'service': {'host': host,
'binary': binary, 'binary': binary,
'forced_down': forced_down}} 'forced_down': forced_down}}
@ -225,6 +228,25 @@ class ServiceController(wsgi.Controller):
return action(body, context) return action(body, context)
def _check_for_evacuations(self, context, hostname):
# NOTE(lyarwood): When forcing a compute service back up ensure that
# there are no evacuation migration records against this host as the
# source that are marked as done, suggesting that the compute service
# hasn't restarted and moved such records to a completed state.
filters = {
'source_compute': hostname,
'status': 'done',
'migration_type': objects.fields.MigrationType.EVACUATION,
}
if any(objects.MigrationList.get_by_filters(context, filters)):
msg = _("Unable to force up host %(host)s as `done` evacuation "
"migration records remain associated with the host. "
"Ensure the compute service has been restarted, "
"allowing these records to move to `completed` before "
"retrying this request.") % {'host': hostname}
# TODO(lyarwood): Move to 409 HTTPConflict under a new microversion
raise webob.exc.HTTPBadRequest(explanation=msg)
@wsgi.response(204) @wsgi.response(204)
@wsgi.expected_errors((400, 404, 409)) @wsgi.expected_errors((400, 404, 409))
def delete(self, req, id): def delete(self, req, id):
@ -446,6 +468,8 @@ class ServiceController(wsgi.Controller):
if 'forced_down' in body: if 'forced_down' in body:
service.forced_down = strutils.bool_from_string( service.forced_down = strutils.bool_from_string(
body['forced_down'], strict=True) body['forced_down'], strict=True)
if service.forced_down is False:
self._check_for_evacuations(context, service.host)
# Check to see if anything was actually updated since the schema does # Check to see if anything was actually updated since the schema does
# not define any required fields. # not define any required fields.

View File

@ -252,6 +252,7 @@ class TestInstanceNotificationSampleWithMultipleCompute(
services = self.admin_api.get_services(host='host2', services = self.admin_api.get_services(host='host2',
binary='nova-compute') binary='nova-compute')
service_id = services[0]['id'] service_id = services[0]['id']
self.compute2.stop()
self.admin_api.put_service(service_id, {'forced_down': True}) self.admin_api.put_service(service_id, {'forced_down': True})
evacuate = { evacuate = {
'evacuate': { 'evacuate': {
@ -272,6 +273,8 @@ class TestInstanceNotificationSampleWithMultipleCompute(
'reservation_id': server['reservation_id'], 'reservation_id': server['reservation_id'],
'uuid': server['id']}, 'uuid': server['id']},
actual=notifications[0]) actual=notifications[0])
self.compute2.start()
self._wait_for_migration_status(server, ['completed'])
self.admin_api.put_service(service_id, {'forced_down': False}) self.admin_api.put_service(service_id, {'forced_down': False})
def _test_live_migration_force_complete(self, server): def _test_live_migration_force_complete(self, server):

View File

@ -10,6 +10,7 @@
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
from nova.tests.functional.api import client
from nova.tests.functional import integrated_helpers from nova.tests.functional import integrated_helpers
@ -61,16 +62,13 @@ class ForceUpWithDoneEvacuations(integrated_helpers._IntegratedTestBase):
expected_migration_status='done' expected_migration_status='done'
) )
# FIXME(lyarwood): This is bug #1922053, this shouldn't be allowed with # Assert that the request to force up the host is rejected
# `done` evacuation migration records still associated with the host. ex = self.assertRaises(
# Replace this with the following assertion once fixed: client.OpenStackApiException,
# ex = self.assertRaises( self._force_up_compute,
# client.OpenStackApiException, 'compute',
# self._force_up_compute, )
# 'compute', self.assertEqual(400, ex.response.status_code)
# )
# self.assertEqual(400, ex.response.status_code)
self._force_up_compute('compute')
# Assert that the evacuation migration record remains `done` # Assert that the evacuation migration record remains `done`
self._wait_for_migration_status(server, ["done"]) self._wait_for_migration_status(server, ["done"])
@ -82,6 +80,9 @@ class ForceUpWithDoneEvacuations(integrated_helpers._IntegratedTestBase):
# Assert that the evacuation migration record is now `completed` # Assert that the evacuation migration record is now `completed`
self._wait_for_migration_status(server, ["completed"]) self._wait_for_migration_status(server, ["completed"])
# Assert that we can now force up the host
self._force_up_compute('compute')
class ForceUpWithDoneEvacuationsv252(ForceUpWithDoneEvacuations): class ForceUpWithDoneEvacuationsv252(ForceUpWithDoneEvacuations):

View File

@ -7181,9 +7181,9 @@ class ServerMoveWithPortResourceRequestTest(
self._assert_pci_request_pf_device_name(server, 'host2-ens2') self._assert_pci_request_pf_device_name(server, 'host2-ens2')
# recover source compute # recover source compute
self.compute1 = self.restart_compute_service(self.compute1)
self.admin_api.put_service( self.admin_api.put_service(
self.compute1_service_id, {'forced_down': 'false'}) self.compute1_service_id, {'forced_down': 'false'})
self.compute1 = self.restart_compute_service(self.compute1)
# check that source allocation is cleaned up and the dest allocation # check that source allocation is cleaned up and the dest allocation
# and the port bindings are not touched. # and the port bindings are not touched.
@ -7226,9 +7226,9 @@ class ServerMoveWithPortResourceRequestTest(
qos_normal_port, qos_sriov_port, self.flavor_with_group_policy) qos_normal_port, qos_sriov_port, self.flavor_with_group_policy)
# recover source compute # recover source compute
self.compute1 = self.restart_compute_service(self.compute1)
self.admin_api.put_service( self.admin_api.put_service(
self.compute1_service_id, {'forced_down': 'false'}) self.compute1_service_id, {'forced_down': 'false'})
self.compute1 = self.restart_compute_service(self.compute1)
# check again that even after source host recovery the source # check again that even after source host recovery the source
# allocation is intact # allocation is intact