compute: Ensure pre-migrating instances are destroyed during init_host
Previously _destroy_evacuated_instances would not remove instances associated with evacuation migration records in a pre-migrating state. This could lead to a race between the original source host and the new destination if the source returned early, calling init_instance during the evacuation process. This change now includes pre-migrating migration records when looking for active evacuations. Additionally the dict of evacuating instances is then returned to init_host and used to skip running init_instance against such instances ensuring no race occurs between the two computes. Closes-bug: #1764883 Change-Id: I379678dfdb2609f12a572d4f99c8e9da4deab803
This commit is contained in:
parent
b9213398c7
commit
c4988cdabf
|
@ -624,10 +624,11 @@ class ComputeManager(manager.Manager):
|
|||
While nova-compute was down, the instances running on it could be
|
||||
evacuated to another host. This method looks for evacuation migration
|
||||
records where this is the source host and which were either started
|
||||
(accepted) or complete (done). From those migration records, local
|
||||
instances reported by the hypervisor are compared to the instances
|
||||
for the migration records and those local guests are destroyed, along
|
||||
with instance allocation records in Placement for this node.
|
||||
(accepted), in-progress (pre-migrating) or migrated (done). From those
|
||||
migration records, local instances reported by the hypervisor are
|
||||
compared to the instances for the migration records and those local
|
||||
guests are destroyed, along with instance allocation records in
|
||||
Placement for this node.
|
||||
"""
|
||||
filters = {
|
||||
'source_compute': self.host,
|
||||
|
@ -635,7 +636,13 @@ class ComputeManager(manager.Manager):
|
|||
# included in case the source node comes back up while instances
|
||||
# are being evacuated to another host. We don't want the same
|
||||
# instance being reported from multiple hosts.
|
||||
'status': ['accepted', 'done'],
|
||||
# NOTE(lyarwood): pre-migrating is also included here as the
|
||||
# source compute can come back online shortly after the RT
|
||||
# claims on the destination that in-turn moves the migration to
|
||||
# pre-migrating. If the evacuate fails on the destination host,
|
||||
# the user can rebuild the instance (in ERROR state) on the source
|
||||
# host.
|
||||
'status': ['accepted', 'pre-migrating', 'done'],
|
||||
'migration_type': 'evacuation',
|
||||
}
|
||||
with utils.temporary_mutation(context, read_deleted='yes'):
|
||||
|
@ -698,6 +705,7 @@ class ComputeManager(manager.Manager):
|
|||
|
||||
migration.status = 'completed'
|
||||
migration.save()
|
||||
return evacuations
|
||||
|
||||
def _is_instance_storage_shared(self, context, instance, host=None):
|
||||
shared_storage = True
|
||||
|
@ -1133,9 +1141,14 @@ class ComputeManager(manager.Manager):
|
|||
|
||||
try:
|
||||
# checking that instance was not already evacuated to other host
|
||||
self._destroy_evacuated_instances(context)
|
||||
evacuated_instances = self._destroy_evacuated_instances(context)
|
||||
|
||||
# Initialise instances on the host that are not evacuating
|
||||
for instance in instances:
|
||||
self._init_instance(context, instance)
|
||||
if (not evacuated_instances or
|
||||
instance.uuid not in evacuated_instances):
|
||||
self._init_instance(context, instance)
|
||||
|
||||
finally:
|
||||
if CONF.defer_iptables_apply:
|
||||
self.driver.filter_defer_apply_off()
|
||||
|
|
|
@ -25,13 +25,9 @@ class TestEvacuationWithSourceReturningDuringRebuild(
|
|||
test.TestCase, integrated_helpers.InstanceHelperMixin):
|
||||
"""Assert the behaviour of evacuating instances when the src returns early.
|
||||
|
||||
This test asserts that evacuating instances end up in an ERROR state on the
|
||||
source when that host comes back online during an evacuation while the
|
||||
migration record is in a pre-migrating state.
|
||||
|
||||
This currently leads to a race between the source initialising the instance
|
||||
and destination claim during the rebuild, both of which attempt to update
|
||||
the underlying task_state of the instance.
|
||||
This test asserts that evacuating instances end up in an ACTIVE state on
|
||||
the destination even when the source host comes back online during an
|
||||
evacuation while the migration record is in a pre-migrating state.
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
|
@ -113,8 +109,15 @@ class TestEvacuationWithSourceReturningDuringRebuild(
|
|||
# Start evacuating the instance from the source_host
|
||||
self.api.post_server_action(server['id'], {'evacuate': {}})
|
||||
|
||||
# FIXME(lyarwood): Assert that the evacuation fails at present with the
|
||||
# instance remaining on the source in an ERROR state.
|
||||
self._wait_for_state_change(self.api, server, 'ERROR')
|
||||
# Wait for the instance to go into an ACTIVE state
|
||||
self._wait_for_state_change(self.api, server, 'ACTIVE')
|
||||
server = self.api.get_server(server['id'])
|
||||
self.assertEqual(self.source_compute, server['OS-EXT-SRV-ATTR:host'])
|
||||
host = server['OS-EXT-SRV-ATTR:host']
|
||||
migrations = self.api.get_migrations()
|
||||
|
||||
# Assert that we have a single `done` migration record after the evac
|
||||
self.assertEqual(1, len(migrations))
|
||||
self.assertEqual('done', migrations[0]['status'])
|
||||
|
||||
# Assert that the instance is now on the dest
|
||||
self.assertNotEqual(self.source_compute, host)
|
||||
|
|
|
@ -7500,7 +7500,10 @@ class ComputeTestCase(BaseTestCase,
|
|||
|
||||
mock_get_filter.assert_called_once_with(fake_context,
|
||||
{'source_compute': self.compute.host,
|
||||
'status': ['accepted', 'done'],
|
||||
'status': [
|
||||
'accepted',
|
||||
'pre-migrating',
|
||||
'done'],
|
||||
'migration_type': 'evacuation'})
|
||||
mock_get_inst.assert_called_once_with(fake_context)
|
||||
mock_get_nw.assert_called_once_with(fake_context, evacuated_instance)
|
||||
|
|
|
@ -766,6 +766,36 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
|||
mock.ANY, mock.ANY, mock.ANY)
|
||||
mock_save.assert_called_once_with()
|
||||
|
||||
@mock.patch.object(context, 'get_admin_context')
|
||||
@mock.patch.object(objects.InstanceList, 'get_by_host')
|
||||
@mock.patch.object(fake_driver.FakeDriver, 'init_host')
|
||||
@mock.patch('nova.compute.manager.ComputeManager._init_instance')
|
||||
@mock.patch('nova.compute.manager.ComputeManager.'
|
||||
'_destroy_evacuated_instances')
|
||||
def test_init_host_with_in_progress_evacuations(self, mock_destroy_evac,
|
||||
mock_init_instance, mock_init_host, mock_host_get,
|
||||
mock_admin_ctxt):
|
||||
"""Assert that init_instance is not called for instances that are
|
||||
evacuating from the host during init_host.
|
||||
"""
|
||||
active_instance = fake_instance.fake_instance_obj(
|
||||
self.context, host=self.compute.host, uuid=uuids.active_instance)
|
||||
evacuating_instance = fake_instance.fake_instance_obj(
|
||||
self.context, host=self.compute.host, uuid=uuids.evac_instance)
|
||||
instance_list = objects.InstanceList(self.context,
|
||||
objects=[active_instance, evacuating_instance])
|
||||
|
||||
mock_host_get.return_value = instance_list
|
||||
mock_admin_ctxt.return_value = self.context
|
||||
mock_destroy_evac.return_value = {
|
||||
uuids.evac_instance: evacuating_instance
|
||||
}
|
||||
|
||||
self.compute.init_host()
|
||||
|
||||
mock_init_instance.assert_called_once_with(
|
||||
self.context, active_instance)
|
||||
|
||||
def test_init_instance_with_binding_failed_vif_type(self):
|
||||
# this instance will plug a 'binding_failed' vif
|
||||
instance = fake_instance.fake_instance_obj(
|
||||
|
|
Loading…
Reference in New Issue