Merge "Sanity check instance mapping during scheduling" into stable/stein
This commit is contained in:
commit
051a51f521
|
@ -1301,8 +1301,37 @@ class ComputeTaskManager(base.Base):
|
|||
|
||||
updates = {'vm_state': vm_states.ERROR, 'task_state': None}
|
||||
for instance in instances_by_uuid.values():
|
||||
|
||||
inst_mapping = None
|
||||
try:
|
||||
# We don't need the cell0-targeted context here because the
|
||||
# instance mapping is in the API DB.
|
||||
inst_mapping = \
|
||||
objects.InstanceMapping.get_by_instance_uuid(
|
||||
context, instance.uuid)
|
||||
except exception.InstanceMappingNotFound:
|
||||
# The API created the instance mapping record so it should
|
||||
# definitely be here. Log an error but continue to create the
|
||||
# instance in the cell0 database.
|
||||
LOG.error('While burying instance in cell0, no instance '
|
||||
'mapping was found.', instance=instance)
|
||||
|
||||
# Perform a final sanity check that the instance is not mapped
|
||||
# to some other cell already because of maybe some crazy
|
||||
# clustered message queue weirdness.
|
||||
if inst_mapping and inst_mapping.cell_mapping is not None:
|
||||
LOG.error('When attempting to bury instance in cell0, the '
|
||||
'instance is already mapped to cell %s. Ignoring '
|
||||
'bury in cell0 attempt.',
|
||||
inst_mapping.cell_mapping.identity,
|
||||
instance=instance)
|
||||
continue
|
||||
|
||||
with obj_target_cell(instance, cell0) as cctxt:
|
||||
instance.create()
|
||||
if inst_mapping:
|
||||
inst_mapping.cell_mapping = cell0
|
||||
inst_mapping.save()
|
||||
|
||||
# Record an instance action with a failed event.
|
||||
self._create_instance_action_for_cell0(
|
||||
|
@ -1322,16 +1351,6 @@ class ComputeTaskManager(base.Base):
|
|||
self._set_vm_state_and_notify(
|
||||
cctxt, instance.uuid, 'build_instances', updates,
|
||||
exc, request_spec)
|
||||
try:
|
||||
# We don't need the cell0-targeted context here because the
|
||||
# instance mapping is in the API DB.
|
||||
inst_mapping = \
|
||||
objects.InstanceMapping.get_by_instance_uuid(
|
||||
context, instance.uuid)
|
||||
inst_mapping.cell_mapping = cell0
|
||||
inst_mapping.save()
|
||||
except exception.InstanceMappingNotFound:
|
||||
pass
|
||||
|
||||
for build_request in build_requests:
|
||||
try:
|
||||
|
@ -1542,13 +1561,8 @@ class ComputeTaskManager(base.Base):
|
|||
instance.tags = instance_tags if instance_tags \
|
||||
else objects.TagList()
|
||||
|
||||
# Update mapping for instance. Normally this check is guarded by
|
||||
# a try/except but if we're here we know that a newer nova-api
|
||||
# handled the build process and would have created the mapping
|
||||
inst_mapping = objects.InstanceMapping.get_by_instance_uuid(
|
||||
context, instance.uuid)
|
||||
inst_mapping.cell_mapping = cell
|
||||
inst_mapping.save()
|
||||
# Update mapping for instance.
|
||||
self._map_instance_to_cell(context, instance, cell)
|
||||
|
||||
if not self._delete_build_request(
|
||||
context, build_request, instance, cell, instance_bdms,
|
||||
|
@ -1576,6 +1590,37 @@ class ComputeTaskManager(base.Base):
|
|||
host=host.service_host, node=host.nodename,
|
||||
limits=host.limits, host_list=host_list)
|
||||
|
||||
@staticmethod
|
||||
def _map_instance_to_cell(context, instance, cell):
|
||||
"""Update the instance mapping to point at the given cell.
|
||||
|
||||
During initial scheduling once a host and cell is selected in which
|
||||
to build the instance this method is used to update the instance
|
||||
mapping to point at that cell.
|
||||
|
||||
:param context: nova auth RequestContext
|
||||
:param instance: Instance object being built
|
||||
:param cell: CellMapping representing the cell in which the instance
|
||||
was created and is being built.
|
||||
:returns: InstanceMapping object that was updated.
|
||||
"""
|
||||
inst_mapping = objects.InstanceMapping.get_by_instance_uuid(
|
||||
context, instance.uuid)
|
||||
# Perform a final sanity check that the instance is not mapped
|
||||
# to some other cell already because of maybe some crazy
|
||||
# clustered message queue weirdness.
|
||||
if inst_mapping.cell_mapping is not None:
|
||||
LOG.error('During scheduling instance is already mapped to '
|
||||
'another cell: %s. This should not happen and is an '
|
||||
'indication of bigger problems. If you see this you '
|
||||
'should report it to the nova team. Overwriting '
|
||||
'the mapping to point at cell %s.',
|
||||
inst_mapping.cell_mapping.identity, cell.identity,
|
||||
instance=instance)
|
||||
inst_mapping.cell_mapping = cell
|
||||
inst_mapping.save()
|
||||
return inst_mapping
|
||||
|
||||
def _cleanup_build_artifacts(self, context, exc, instances, build_requests,
|
||||
request_specs, block_device_mappings, tags,
|
||||
cell_mapping_cache):
|
||||
|
|
|
@ -2270,6 +2270,27 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
|
|||
self.params['request_specs'][0].requested_resources = []
|
||||
self._do_schedule_and_build_instances_test(self.params)
|
||||
|
||||
def test_map_instance_to_cell_already_mapped(self):
|
||||
"""Tests a scenario where an instance is already mapped to a cell
|
||||
during scheduling.
|
||||
"""
|
||||
build_request = self.params['build_requests'][0]
|
||||
instance = build_request.get_new_instance(self.ctxt)
|
||||
# Simulate MQ split brain craziness by updating the instance mapping
|
||||
# to point at cell0.
|
||||
inst_mapping = objects.InstanceMapping.get_by_instance_uuid(
|
||||
self.ctxt, instance.uuid)
|
||||
inst_mapping.cell_mapping = self.cell_mappings['cell0']
|
||||
inst_mapping.save()
|
||||
cell1 = self.cell_mappings['cell1']
|
||||
inst_mapping = self.conductor._map_instance_to_cell(
|
||||
self.ctxt, instance, cell1)
|
||||
# Assert that the instance mapping was updated to point at cell1 but
|
||||
# also that an error was logged.
|
||||
self.assertEqual(cell1.uuid, inst_mapping.cell_mapping.uuid)
|
||||
self.assertIn('During scheduling instance is already mapped to '
|
||||
'another cell', self.stdlog.logger.output)
|
||||
|
||||
@mock.patch('nova.objects.InstanceMapping.get_by_instance_uuid')
|
||||
def test_cleanup_build_artifacts(self, inst_map_get):
|
||||
"""Simple test to ensure the order of operations in the cleanup method
|
||||
|
@ -2367,6 +2388,17 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
|
|||
build_requests = objects.BuildRequestList.get_all(self.ctxt)
|
||||
instances = objects.InstanceList.get_all(self.ctxt)
|
||||
|
||||
# Verify instance mappings.
|
||||
inst_mappings = objects.InstanceMappingList.get_by_cell_id(
|
||||
self.ctxt, self.cell_mappings['cell0'].id)
|
||||
# bare_br is the only instance that has a mapping from setUp.
|
||||
self.assertEqual(1, len(inst_mappings))
|
||||
# Since we did not setup instance mappings for the other fake build
|
||||
# requests used in this test, we should see a message logged about
|
||||
# there being no instance mappings.
|
||||
self.assertIn('While burying instance in cell0, no instance mapping '
|
||||
'was found', self.stdlog.logger.output)
|
||||
|
||||
self.assertEqual(0, len(build_requests))
|
||||
self.assertEqual(4, len(instances))
|
||||
inst_states = {inst.uuid: (inst.deleted, inst.vm_state)
|
||||
|
@ -2464,6 +2496,32 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
|
|||
test.MatchType(context.RequestContext), inst.uuid,
|
||||
self.params['tags'])
|
||||
|
||||
@mock.patch('nova.objects.Instance.create')
|
||||
def test_bury_in_cell0_already_mapped(self, mock_inst_create):
|
||||
"""Tests a scenario where the instance mapping is already mapped to a
|
||||
cell when we attempt to bury the instance in cell0.
|
||||
"""
|
||||
build_request = self.params['build_requests'][0]
|
||||
# Simulate MQ split brain craziness by updating the instance mapping
|
||||
# to point at cell1.
|
||||
inst_mapping = objects.InstanceMapping.get_by_instance_uuid(
|
||||
self.ctxt, build_request.instance_uuid)
|
||||
inst_mapping.cell_mapping = self.cell_mappings['cell1']
|
||||
inst_mapping.save()
|
||||
# Now attempt to bury the instance in cell0.
|
||||
with mock.patch.object(inst_mapping, 'save') as mock_inst_map_save:
|
||||
self.conductor._bury_in_cell0(
|
||||
self.ctxt, self.params['request_specs'][0],
|
||||
exc.NoValidHost(reason='idk'), build_requests=[build_request])
|
||||
# We should have exited without creating the instance in cell0 nor
|
||||
# should the instance mapping have been updated to point at cell0.
|
||||
mock_inst_create.assert_not_called()
|
||||
mock_inst_map_save.assert_not_called()
|
||||
# And we should have logged an error.
|
||||
self.assertIn('When attempting to bury instance in cell0, the '
|
||||
'instance is already mapped to cell',
|
||||
self.stdlog.logger.output)
|
||||
|
||||
def test_reset(self):
|
||||
with mock.patch('nova.compute.rpcapi.ComputeAPI') as mock_rpc:
|
||||
old_rpcapi = self.conductor_manager.compute_rpcapi
|
||||
|
|
Loading…
Reference in New Issue