Stop instance build if BuildRequest deleted

As part of the cellsv2 instance boot process there is a check that the
BuildRequest still exists before sending the request to a compute. This
is how an instance delete for an unscheduled booting instance will stop
the boot from proceeding.

However if there are some older nova-api services then the BuildRequest
may not have ever existed so it's non-existence does not indicate a
delete occurred. A service version check accounts for this and lets the
build proceed as normal. In that case the delete code will also need to
account for the lower service version and clean up the instance from the
cell.

Change-Id: Id65ab1911071608a739bea96e071939baa388493
Partially-implements: bp add-buildrequest-obj
Partially-implements: bp cells-scheduling-interaction
This commit is contained in:
Andrew Laski 2016-06-21 17:13:02 -04:00
parent d9d3b16527
commit 6ce01a655b
3 changed files with 86 additions and 18 deletions

View File

@ -383,22 +383,31 @@ class ComputeTaskManager(base.Base):
self.scheduler_client)
def _destroy_build_request(self, context, instance):
try:
build_request = objects.BuildRequest.get_by_instance_uuid(context,
instance.uuid)
except exception.BuildRequestNotFound:
LOG.debug('BuildRequest not found for instance %(uuid)s, likely '
'due to an older nova-api service running.',
{'uuid': instance.uuid})
return
# The BuildRequest needs to be stored until the instance is mapped to
# an instance table. At that point it will never be used again and
# should be deleted.
# TODO(alaski): Sync API updates to the build_request to the
# instance before it is destroyed. Right now only locked_by can
# be updated before this is destroyed.
build_request.destroy()
try:
build_request = objects.BuildRequest.get_by_instance_uuid(context,
instance.uuid)
# TODO(alaski): Sync API updates of the build_request to the
# instance before it is destroyed. Right now only locked_by can
# be updated before this is destroyed.
build_request.destroy()
except exception.BuildRequestNotFound:
with excutils.save_and_reraise_exception() as exc_ctxt:
service_version = objects.Service.get_minimum_version(
context, 'nova-api')
if service_version >= 12:
# A BuildRequest was created during the boot process, the
# NotFound exception indicates a delete happened which
# should abort the boot.
pass
else:
LOG.debug('BuildRequest not found for instance %(uuid)s, '
'likely due to an older nova-api service '
'running.', {'uuid': instance.uuid})
exc_ctxt.reraise = False
return
def _populate_instance_mapping(self, context, instance, host):
try:
@ -412,6 +421,7 @@ class ComputeTaskManager(base.Base):
LOG.debug('Instance was not mapped to a cell, likely due '
'to an older nova-api service running.',
instance=instance)
return None
else:
try:
host_mapping = objects.HostMapping.get_by_host(context,
@ -423,9 +433,11 @@ class ComputeTaskManager(base.Base):
# Eventually this will indicate a failure to properly map a
# host to a cell and we may want to reschedule.
inst_mapping.destroy()
return None
else:
inst_mapping.cell_mapping = host_mapping.cell_mapping
inst_mapping.save()
return inst_mapping
def build_instances(self, context, instances, image, filter_properties,
admin_password, injected_files, requested_networks,
@ -482,8 +494,16 @@ class ComputeTaskManager(base.Base):
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
context, instance.uuid)
self._populate_instance_mapping(context, instance, host)
self._destroy_build_request(context, instance)
inst_mapping = self._populate_instance_mapping(context, instance,
host)
try:
self._destroy_build_request(context, instance)
except exception.BuildRequestNotFound:
# This indicates an instance delete has been requested in the
# API. Stop the build and cleanup the instance_mapping.
if inst_mapping:
inst_mapping.destroy()
return
self.compute_rpcapi.build_and_run_instance(context,
instance=instance, host=host['host'], image=image,

View File

@ -82,7 +82,8 @@ SERVICE_VERSION_HISTORY = (
# Version 11: Compute RPC version 4.12
{'compute_rpc': '4.12'},
# Version 12: The network APIs and compute manager support a NetworkRequest
# object where the network_id value is 'auto' or 'none'.
# object where the network_id value is 'auto' or 'none'. BuildRequest
# objects are populated by nova-api during instance boot.
{'compute_rpc': '4.12'},
# Version 13: Compute RPC version 4.13
{'compute_rpc': '4.13'},

View File

@ -782,8 +782,8 @@ class _BaseTaskTestCase(object):
'select_destinations')
@mock.patch.object(conductor_manager.ComputeTaskManager,
'_set_vm_state_and_notify', new=mock.MagicMock())
def test_build_instances_build_request_not_found(self, mock_select_dests,
mock_build_req_get):
def test_build_instances_build_request_not_found_older_api(self,
mock_select_dests, mock_build_req_get):
mock_select_dests.return_value = [
{'host': 'host1', 'nodename': 'node1', 'limits': []},
@ -817,6 +817,53 @@ class _BaseTaskTestCase(object):
do_test()
@mock.patch.object(objects.Instance, 'refresh', new=mock.MagicMock())
@mock.patch.object(objects.BuildRequest, 'get_by_instance_uuid',
side_effect=exc.BuildRequestNotFound(uuid='fake'))
@mock.patch.object(scheduler_client.SchedulerClient,
'select_destinations')
@mock.patch.object(conductor_manager.ComputeTaskManager,
'_set_vm_state_and_notify', new=mock.MagicMock())
def test_build_instances_build_request_not_found_because_delete(self,
mock_select_dests, mock_build_req_get):
mock_select_dests.return_value = [
{'host': 'host1', 'nodename': 'node1', 'limits': []},
{'host': 'host2', 'nodename': 'node2', 'limits': []}]
num_instances = 2
instances = [fake_instance.fake_instance_obj(self.context)
for i in range(num_instances)]
image = {'fake-data': 'should_pass_silently'}
# build_instances() is a cast, we need to wait for it to complete
self.useFixture(cast_as_call.CastAsCall(self.stubs))
# Ensure service is high enough to run the new code path
self.useFixture(fixtures.AllServicesCurrent())
inst_map_mock = mock.MagicMock()
@mock.patch.object(self.conductor_manager.compute_rpcapi,
'build_and_run_instance')
@mock.patch.object(self.conductor_manager,
'_populate_instance_mapping', return_value=inst_map_mock)
def do_test(mock_pop_inst_map, mock_build_and_run):
self.conductor.build_instances(
context=self.context,
instances=instances,
image=image,
filter_properties={},
admin_password='admin_password',
injected_files='injected_files',
requested_networks=None,
security_groups='security_groups',
block_device_mapping='block_device_mapping',
legacy_bdm=False)
self.assertFalse(mock_build_and_run.called)
self.assertTrue(inst_map_mock.destroy.called)
do_test()
def test_unshelve_instance_on_host(self):
instance = self._create_fake_instance_obj()
instance.vm_state = vm_states.SHELVED