Make evacuate leave a record for the source compute host to process
This makes the evacuate process leave a record in the database about the evacuation, and from what host it was being performed. Now, the compute manager will use that breadcrumb trail to decide whether it should delete locally-running instances, instead of just the hostname mismatch. Since this does a query by migration_type, I added a unit test to the test_db_api module to prove this actually works. DocImpact: Deprecates the workarounds/destroy_after_evacuate config option Related to blueprint robustify-evacuation Closes-Bug: #1471887 Change-Id: I972c224dfb954a9f06e221c9235ee6e4889c2619
This commit is contained in:
parent
91da50d0c7
commit
d9c696acf8
|
@ -3272,6 +3272,18 @@ class API(base.Base):
|
|||
instance.save(expected_task_state=[None])
|
||||
self._record_action_start(context, instance, instance_actions.EVACUATE)
|
||||
|
||||
# NOTE(danms): Create this as a tombstone for the source compute
|
||||
# to find and cleanup. No need to pass it anywhere else.
|
||||
migration = objects.Migration(context,
|
||||
source_compute=instance.host,
|
||||
source_node=instance.node,
|
||||
instance_uuid=instance.uuid,
|
||||
status='accepted',
|
||||
migration_type='evacuation')
|
||||
if host:
|
||||
migration.dest_compute = host
|
||||
migration.create()
|
||||
|
||||
return self.compute_task_api.rebuild_instance(context,
|
||||
instance=instance,
|
||||
new_pass=admin_password,
|
||||
|
|
|
@ -788,61 +788,44 @@ class ComputeManager(manager.Manager):
|
|||
the MIGRATING, RESIZE_MIGRATING, RESIZE_MIGRATED, RESIZE_FINISH
|
||||
task state or RESIZED vm state.
|
||||
"""
|
||||
our_host = self.host
|
||||
filters = {
|
||||
'source_compute': self.host,
|
||||
'status': 'accepted',
|
||||
'migration_type': 'evacuation',
|
||||
}
|
||||
evacuations = objects.MigrationList.get_by_filters(context, filters)
|
||||
if not evacuations:
|
||||
return
|
||||
evacuations = {mig.instance_uuid: mig for mig in evacuations}
|
||||
|
||||
filters = {'deleted': False}
|
||||
local_instances = self._get_instances_on_driver(context, filters)
|
||||
for instance in local_instances:
|
||||
if instance.host != our_host:
|
||||
if (instance.task_state in [task_states.MIGRATING,
|
||||
task_states.RESIZE_MIGRATING,
|
||||
task_states.RESIZE_MIGRATED,
|
||||
task_states.RESIZE_FINISH]
|
||||
or instance.vm_state in [vm_states.RESIZED]):
|
||||
LOG.debug('Will not delete instance as its host ('
|
||||
'%(instance_host)s) is not equal to our '
|
||||
'host (%(our_host)s) but its task state is '
|
||||
'(%(task_state)s) and vm state is '
|
||||
'(%(vm_state)s)',
|
||||
{'instance_host': instance.host,
|
||||
'our_host': our_host,
|
||||
'task_state': instance.task_state,
|
||||
'vm_state': instance.vm_state},
|
||||
instance=instance)
|
||||
continue
|
||||
if not CONF.workarounds.destroy_after_evacuate:
|
||||
LOG.warning(_LW('Instance %(uuid)s appears to have been '
|
||||
'evacuated from this host to %(host)s. '
|
||||
'Not destroying it locally due to '
|
||||
'config setting '
|
||||
'"workarounds.destroy_after_evacuate". '
|
||||
'If this is not correct, enable that '
|
||||
'option and restart nova-compute.'),
|
||||
{'uuid': instance.uuid,
|
||||
'host': instance.host})
|
||||
continue
|
||||
LOG.info(_LI('Deleting instance as its host ('
|
||||
'%(instance_host)s) is not equal to our '
|
||||
'host (%(our_host)s).'),
|
||||
{'instance_host': instance.host,
|
||||
'our_host': our_host}, instance=instance)
|
||||
try:
|
||||
network_info = self.network_api.get_instance_nw_info(
|
||||
context, instance)
|
||||
bdi = self._get_instance_block_device_info(context,
|
||||
instance)
|
||||
destroy_disks = not (self._is_instance_storage_shared(
|
||||
context, instance))
|
||||
except exception.InstanceNotFound:
|
||||
network_info = network_model.NetworkInfo()
|
||||
bdi = {}
|
||||
LOG.info(_LI('Instance has been marked deleted already, '
|
||||
'removing it from the hypervisor.'),
|
||||
instance=instance)
|
||||
# always destroy disks if the instance was deleted
|
||||
destroy_disks = True
|
||||
self.driver.destroy(context, instance,
|
||||
network_info,
|
||||
bdi, destroy_disks)
|
||||
evacuated = [inst for inst in local_instances
|
||||
if inst.uuid in evacuations]
|
||||
for instance in evacuated:
|
||||
migration = evacuations[instance.uuid]
|
||||
LOG.info(_LI('Deleting instance as it has been evacuated from '
|
||||
'this host'), instance=instance)
|
||||
try:
|
||||
network_info = self.network_api.get_instance_nw_info(
|
||||
context, instance)
|
||||
bdi = self._get_instance_block_device_info(context,
|
||||
instance)
|
||||
destroy_disks = not (self._is_instance_storage_shared(
|
||||
context, instance))
|
||||
except exception.InstanceNotFound:
|
||||
network_info = network_model.NetworkInfo()
|
||||
bdi = {}
|
||||
LOG.info(_LI('Instance has been marked deleted already, '
|
||||
'removing it from the hypervisor.'),
|
||||
instance=instance)
|
||||
# always destroy disks if the instance was deleted
|
||||
destroy_disks = True
|
||||
self.driver.destroy(context, instance,
|
||||
network_info,
|
||||
bdi, destroy_disks)
|
||||
migration.status = 'completed'
|
||||
migration.save()
|
||||
|
||||
def _is_instance_storage_shared(self, context, instance, host=None):
|
||||
shared_storage = True
|
||||
|
|
|
@ -6520,7 +6520,10 @@ class ComputeTestCase(BaseTestCase):
|
|||
instance = self._create_fake_instance_obj(params)
|
||||
self.compute._instance_update(self.context, instance.uuid, vcpus=4)
|
||||
|
||||
def test_destroy_evacuated_instance_on_shared_storage(self):
|
||||
@mock.patch('nova.objects.MigrationList.get_by_filters')
|
||||
@mock.patch('nova.objects.Migration.save')
|
||||
def test_destroy_evacuated_instance_on_shared_storage(self, mock_save,
|
||||
mock_get):
|
||||
fake_context = context.get_admin_context()
|
||||
|
||||
# instances in central db
|
||||
|
@ -6538,6 +6541,9 @@ class ComputeTestCase(BaseTestCase):
|
|||
evacuated_instance = self._create_fake_instance_obj(
|
||||
{'host': 'otherhost'})
|
||||
|
||||
migration = objects.Migration(instance_uuid=evacuated_instance.uuid)
|
||||
mock_get.return_value = [migration]
|
||||
|
||||
instances.append(evacuated_instance)
|
||||
|
||||
self.mox.StubOutWithMock(self.compute,
|
||||
|
@ -6565,8 +6571,15 @@ class ComputeTestCase(BaseTestCase):
|
|||
|
||||
self.mox.ReplayAll()
|
||||
self.compute._destroy_evacuated_instances(fake_context)
|
||||
mock_get.assert_called_once_with(fake_context,
|
||||
{'source_compute': self.compute.host,
|
||||
'status': 'accepted',
|
||||
'migration_type': 'evacuation'})
|
||||
|
||||
def test_destroy_evacuated_instance_with_disks(self):
|
||||
@mock.patch('nova.objects.MigrationList.get_by_filters')
|
||||
@mock.patch('nova.objects.Migration.save')
|
||||
def test_destroy_evacuated_instance_with_disks(self, mock_save,
|
||||
mock_get):
|
||||
fake_context = context.get_admin_context()
|
||||
|
||||
# instances in central db
|
||||
|
@ -6584,6 +6597,9 @@ class ComputeTestCase(BaseTestCase):
|
|||
evacuated_instance = self._create_fake_instance_obj(
|
||||
{'host': 'otherhost'})
|
||||
|
||||
migration = objects.Migration(instance_uuid=evacuated_instance.uuid)
|
||||
mock_get.return_value = [migration]
|
||||
|
||||
instances.append(evacuated_instance)
|
||||
|
||||
self.mox.StubOutWithMock(self.compute,
|
||||
|
@ -6621,7 +6637,10 @@ class ComputeTestCase(BaseTestCase):
|
|||
self.mox.ReplayAll()
|
||||
self.compute._destroy_evacuated_instances(fake_context)
|
||||
|
||||
def test_destroy_evacuated_instance_not_implemented(self):
|
||||
@mock.patch('nova.objects.MigrationList.get_by_filters')
|
||||
@mock.patch('nova.objects.Migration.save')
|
||||
def test_destroy_evacuated_instance_not_implemented(self, mock_save,
|
||||
mock_get):
|
||||
fake_context = context.get_admin_context()
|
||||
|
||||
# instances in central db
|
||||
|
@ -6639,6 +6658,9 @@ class ComputeTestCase(BaseTestCase):
|
|||
evacuated_instance = self._create_fake_instance_obj(
|
||||
{'host': 'otherhost'})
|
||||
|
||||
migration = objects.Migration(instance_uuid=evacuated_instance.uuid)
|
||||
mock_get.return_value = [migration]
|
||||
|
||||
instances.append(evacuated_instance)
|
||||
|
||||
self.mox.StubOutWithMock(self.compute,
|
||||
|
@ -9791,6 +9813,11 @@ class ComputeAPITestCase(BaseTestCase):
|
|||
instance.refresh()
|
||||
self.assertEqual(instance.task_state, task_states.REBUILDING)
|
||||
self.assertEqual(instance.host, 'fake_dest_host')
|
||||
migs = objects.MigrationList.get_by_filters(
|
||||
self.context, {'source_host': 'fake_host'})
|
||||
self.assertEqual(1, len(migs))
|
||||
self.assertEqual('fake_host', migs[0].source_compute)
|
||||
self.assertEqual('accepted', migs[0].status)
|
||||
|
||||
def test_fail_evacuate_from_non_existing_host(self):
|
||||
inst = {}
|
||||
|
|
|
@ -401,11 +401,13 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
|||
self.mox.UnsetStubs()
|
||||
|
||||
@mock.patch('nova.objects.InstanceList')
|
||||
def test_cleanup_host(self, mock_instance_list):
|
||||
@mock.patch('nova.objects.MigrationList.get_by_filters')
|
||||
def test_cleanup_host(self, mock_miglist_get, mock_instance_list):
|
||||
# just testing whether the cleanup_host method
|
||||
# when fired will invoke the underlying driver's
|
||||
# equivalent method.
|
||||
|
||||
mock_miglist_get.return_value = []
|
||||
mock_instance_list.get_by_host.return_value = []
|
||||
|
||||
with mock.patch.object(self.compute, 'driver') as mock_driver:
|
||||
|
@ -426,12 +428,16 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
|||
self.compute.init_virt_events()
|
||||
self.assertFalse(mock_register.called)
|
||||
|
||||
def test_init_host_with_deleted_migration(self):
|
||||
@mock.patch('nova.objects.MigrationList.get_by_filters')
|
||||
@mock.patch('nova.objects.Migration.save')
|
||||
def test_init_host_with_evacuated_instance(self, mock_save, mock_mig_get):
|
||||
our_host = self.compute.host
|
||||
not_our_host = 'not-' + our_host
|
||||
|
||||
deleted_instance = fake_instance.fake_instance_obj(
|
||||
self.context, host=not_our_host, uuid='fake-uuid')
|
||||
migration = objects.Migration(instance_uuid=deleted_instance.uuid)
|
||||
mock_mig_get.return_value = [migration]
|
||||
|
||||
self.mox.StubOutWithMock(self.compute.driver, 'init_host')
|
||||
self.mox.StubOutWithMock(self.compute.driver, 'destroy')
|
||||
|
@ -2054,13 +2060,12 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
|||
self._do_test_set_admin_password_driver_error(
|
||||
exc, vm_states.ACTIVE, None, expected_exception)
|
||||
|
||||
def _test_init_host_with_partial_migration(self, task_state=None,
|
||||
vm_state=vm_states.ACTIVE):
|
||||
def test_destroy_evacuated_instances(self):
|
||||
our_host = self.compute.host
|
||||
instance_1 = objects.Instance(self.context)
|
||||
instance_1.uuid = 'foo'
|
||||
instance_1.task_state = task_state
|
||||
instance_1.vm_state = vm_state
|
||||
instance_1.task_state = None
|
||||
instance_1.vm_state = vm_states.ACTIVE
|
||||
instance_1.host = 'not-' + our_host
|
||||
instance_2 = objects.Instance(self.context)
|
||||
instance_2.uuid = 'bar'
|
||||
|
@ -2068,6 +2073,9 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
|||
instance_2.vm_state = vm_states.ACTIVE
|
||||
instance_2.host = 'not-' + our_host
|
||||
|
||||
# Only instance 2 has a migration record
|
||||
migration = objects.Migration(instance_uuid=instance_2.uuid)
|
||||
|
||||
with contextlib.nested(
|
||||
mock.patch.object(self.compute, '_get_instances_on_driver',
|
||||
return_value=[instance_1,
|
||||
|
@ -2078,45 +2086,19 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
|||
return_value={}),
|
||||
mock.patch.object(self.compute, '_is_instance_storage_shared',
|
||||
return_value=False),
|
||||
mock.patch.object(self.compute.driver, 'destroy')
|
||||
mock.patch.object(self.compute.driver, 'destroy'),
|
||||
mock.patch('nova.objects.MigrationList.get_by_filters'),
|
||||
mock.patch('nova.objects.Migration.save')
|
||||
) as (_get_instances_on_driver, get_instance_nw_info,
|
||||
_get_instance_block_device_info, _is_instance_storage_shared,
|
||||
destroy):
|
||||
destroy, migration_list, migration_save):
|
||||
migration_list.return_value = [migration]
|
||||
self.compute._destroy_evacuated_instances(self.context)
|
||||
# Only instance 2 should be deleted. Instance 1 is still running
|
||||
# here, but no migration from our host exists, so ignore it
|
||||
destroy.assert_called_once_with(self.context, instance_2, None,
|
||||
{}, True)
|
||||
|
||||
def test_init_host_with_partial_migration_migrating(self):
|
||||
self._test_init_host_with_partial_migration(
|
||||
task_state=task_states.MIGRATING)
|
||||
|
||||
def test_init_host_with_partial_migration_resize_migrating(self):
|
||||
self._test_init_host_with_partial_migration(
|
||||
task_state=task_states.RESIZE_MIGRATING)
|
||||
|
||||
def test_init_host_with_partial_migration_resize_migrated(self):
|
||||
self._test_init_host_with_partial_migration(
|
||||
task_state=task_states.RESIZE_MIGRATED)
|
||||
|
||||
def test_init_host_with_partial_migration_finish_resize(self):
|
||||
self._test_init_host_with_partial_migration(
|
||||
task_state=task_states.RESIZE_FINISH)
|
||||
|
||||
def test_init_host_with_partial_migration_resized(self):
|
||||
self._test_init_host_with_partial_migration(
|
||||
vm_state=vm_states.RESIZED)
|
||||
|
||||
@mock.patch('nova.compute.manager.ComputeManager._get_instances_on_driver')
|
||||
def test_evacuate_disabled(self, mock_giod):
|
||||
self.flags(destroy_after_evacuate=False, group='workarounds')
|
||||
inst = mock.MagicMock()
|
||||
inst.uuid = 'foo'
|
||||
inst.host = self.compute.host + '-alt'
|
||||
mock_giod.return_value = [inst]
|
||||
with mock.patch.object(self.compute.driver, 'destroy') as mock_d:
|
||||
self.compute._destroy_evacuated_instances(mock.MagicMock())
|
||||
self.assertFalse(mock_d.called)
|
||||
|
||||
@mock.patch('nova.compute.manager.ComputeManager.'
|
||||
'_destroy_evacuated_instances')
|
||||
@mock.patch('nova.compute.manager.LOG')
|
||||
|
|
|
@ -1248,7 +1248,7 @@ class MigrationTestCase(test.TestCase):
|
|||
|
||||
def _create(self, status='migrating', source_compute='host1',
|
||||
source_node='a', dest_compute='host2', dest_node='b',
|
||||
system_metadata=None):
|
||||
system_metadata=None, migration_type=None):
|
||||
|
||||
values = {'host': source_compute}
|
||||
instance = db.instance_create(self.ctxt, values)
|
||||
|
@ -1258,7 +1258,8 @@ class MigrationTestCase(test.TestCase):
|
|||
|
||||
values = {'status': status, 'source_compute': source_compute,
|
||||
'source_node': source_node, 'dest_compute': dest_compute,
|
||||
'dest_node': dest_node, 'instance_uuid': instance['uuid']}
|
||||
'dest_node': dest_node, 'instance_uuid': instance['uuid'],
|
||||
'migration_type': migration_type}
|
||||
db.migration_create(self.ctxt, values)
|
||||
|
||||
def _assert_in_progress(self, migrations):
|
||||
|
@ -1312,6 +1313,16 @@ class MigrationTestCase(test.TestCase):
|
|||
hosts = [migration['source_compute'], migration['dest_compute']]
|
||||
self.assertIn(filters["host"], hosts)
|
||||
|
||||
def test_get_migrations_by_filters_with_type(self):
|
||||
self._create(status="special", source_compute="host9",
|
||||
migration_type="evacuation")
|
||||
self._create(status="special", source_compute="host9",
|
||||
migration_type="live-migration")
|
||||
filters = {"status": "special", "host": "host9",
|
||||
"migration_type": "evacuation", "hidden": False}
|
||||
migrations = db.migration_get_all_by_filters(self.ctxt, filters)
|
||||
self.assertEqual(1, len(migrations))
|
||||
|
||||
def test_get_migrations_by_filters_source_compute(self):
|
||||
filters = {'source_compute': 'host2'}
|
||||
migrations = db.migration_get_all_by_filters(self.ctxt, filters)
|
||||
|
|
|
@ -110,7 +110,9 @@ workarounds_opts = [
|
|||
'https://bugs.launchpad.net/nova/+bug/1334398'),
|
||||
cfg.BoolOpt('destroy_after_evacuate',
|
||||
default=True,
|
||||
help='Whether to destroy instances on startup when we suspect '
|
||||
deprecated_for_removal=True,
|
||||
help='DEPRECATED: Whether to destroy '
|
||||
'instances on startup when we suspect '
|
||||
'they have previously been evacuated. This can result in '
|
||||
'data loss if undesired. See '
|
||||
'https://launchpad.net/bugs/1419785'),
|
||||
|
|
Loading…
Reference in New Issue