Make evacuate leave a record for the source compute host to process

This makes the evacuate process leave a record in the database about the
evacuation, and from what host it was being performed. Now, the compute
manager will use that breadcrumb trail to decide whether it should delete
locally-running instances, instead of just the hostname mismatch.

Since this does a query by migration_type, I added a unit test to the
test_db_api module to prove this actually works.

DocImpact: Deprecates the workarounds/destroy_after_evacuate config option

Related to blueprint robustify-evacuation
Closes-Bug: #1471887
Change-Id: I972c224dfb954a9f06e221c9235ee6e4889c2619
This commit is contained in:
Dan Smith 2015-05-14 15:52:05 -07:00
parent 91da50d0c7
commit d9c696acf8
6 changed files with 115 additions and 98 deletions

View File

@ -3272,6 +3272,18 @@ class API(base.Base):
instance.save(expected_task_state=[None])
self._record_action_start(context, instance, instance_actions.EVACUATE)
# NOTE(danms): Create this as a tombstone for the source compute
# to find and cleanup. No need to pass it anywhere else.
migration = objects.Migration(context,
source_compute=instance.host,
source_node=instance.node,
instance_uuid=instance.uuid,
status='accepted',
migration_type='evacuation')
if host:
migration.dest_compute = host
migration.create()
return self.compute_task_api.rebuild_instance(context,
instance=instance,
new_pass=admin_password,

View File

@ -788,61 +788,44 @@ class ComputeManager(manager.Manager):
the MIGRATING, RESIZE_MIGRATING, RESIZE_MIGRATED, RESIZE_FINISH
task state or RESIZED vm state.
"""
our_host = self.host
filters = {
'source_compute': self.host,
'status': 'accepted',
'migration_type': 'evacuation',
}
evacuations = objects.MigrationList.get_by_filters(context, filters)
if not evacuations:
return
evacuations = {mig.instance_uuid: mig for mig in evacuations}
filters = {'deleted': False}
local_instances = self._get_instances_on_driver(context, filters)
for instance in local_instances:
if instance.host != our_host:
if (instance.task_state in [task_states.MIGRATING,
task_states.RESIZE_MIGRATING,
task_states.RESIZE_MIGRATED,
task_states.RESIZE_FINISH]
or instance.vm_state in [vm_states.RESIZED]):
LOG.debug('Will not delete instance as its host ('
'%(instance_host)s) is not equal to our '
'host (%(our_host)s) but its task state is '
'(%(task_state)s) and vm state is '
'(%(vm_state)s)',
{'instance_host': instance.host,
'our_host': our_host,
'task_state': instance.task_state,
'vm_state': instance.vm_state},
instance=instance)
continue
if not CONF.workarounds.destroy_after_evacuate:
LOG.warning(_LW('Instance %(uuid)s appears to have been '
'evacuated from this host to %(host)s. '
'Not destroying it locally due to '
'config setting '
'"workarounds.destroy_after_evacuate". '
'If this is not correct, enable that '
'option and restart nova-compute.'),
{'uuid': instance.uuid,
'host': instance.host})
continue
LOG.info(_LI('Deleting instance as its host ('
'%(instance_host)s) is not equal to our '
'host (%(our_host)s).'),
{'instance_host': instance.host,
'our_host': our_host}, instance=instance)
try:
network_info = self.network_api.get_instance_nw_info(
context, instance)
bdi = self._get_instance_block_device_info(context,
instance)
destroy_disks = not (self._is_instance_storage_shared(
context, instance))
except exception.InstanceNotFound:
network_info = network_model.NetworkInfo()
bdi = {}
LOG.info(_LI('Instance has been marked deleted already, '
'removing it from the hypervisor.'),
instance=instance)
# always destroy disks if the instance was deleted
destroy_disks = True
self.driver.destroy(context, instance,
network_info,
bdi, destroy_disks)
evacuated = [inst for inst in local_instances
if inst.uuid in evacuations]
for instance in evacuated:
migration = evacuations[instance.uuid]
LOG.info(_LI('Deleting instance as it has been evacuated from '
'this host'), instance=instance)
try:
network_info = self.network_api.get_instance_nw_info(
context, instance)
bdi = self._get_instance_block_device_info(context,
instance)
destroy_disks = not (self._is_instance_storage_shared(
context, instance))
except exception.InstanceNotFound:
network_info = network_model.NetworkInfo()
bdi = {}
LOG.info(_LI('Instance has been marked deleted already, '
'removing it from the hypervisor.'),
instance=instance)
# always destroy disks if the instance was deleted
destroy_disks = True
self.driver.destroy(context, instance,
network_info,
bdi, destroy_disks)
migration.status = 'completed'
migration.save()
def _is_instance_storage_shared(self, context, instance, host=None):
shared_storage = True

View File

@ -6520,7 +6520,10 @@ class ComputeTestCase(BaseTestCase):
instance = self._create_fake_instance_obj(params)
self.compute._instance_update(self.context, instance.uuid, vcpus=4)
def test_destroy_evacuated_instance_on_shared_storage(self):
@mock.patch('nova.objects.MigrationList.get_by_filters')
@mock.patch('nova.objects.Migration.save')
def test_destroy_evacuated_instance_on_shared_storage(self, mock_save,
mock_get):
fake_context = context.get_admin_context()
# instances in central db
@ -6538,6 +6541,9 @@ class ComputeTestCase(BaseTestCase):
evacuated_instance = self._create_fake_instance_obj(
{'host': 'otherhost'})
migration = objects.Migration(instance_uuid=evacuated_instance.uuid)
mock_get.return_value = [migration]
instances.append(evacuated_instance)
self.mox.StubOutWithMock(self.compute,
@ -6565,8 +6571,15 @@ class ComputeTestCase(BaseTestCase):
self.mox.ReplayAll()
self.compute._destroy_evacuated_instances(fake_context)
mock_get.assert_called_once_with(fake_context,
{'source_compute': self.compute.host,
'status': 'accepted',
'migration_type': 'evacuation'})
def test_destroy_evacuated_instance_with_disks(self):
@mock.patch('nova.objects.MigrationList.get_by_filters')
@mock.patch('nova.objects.Migration.save')
def test_destroy_evacuated_instance_with_disks(self, mock_save,
mock_get):
fake_context = context.get_admin_context()
# instances in central db
@ -6584,6 +6597,9 @@ class ComputeTestCase(BaseTestCase):
evacuated_instance = self._create_fake_instance_obj(
{'host': 'otherhost'})
migration = objects.Migration(instance_uuid=evacuated_instance.uuid)
mock_get.return_value = [migration]
instances.append(evacuated_instance)
self.mox.StubOutWithMock(self.compute,
@ -6621,7 +6637,10 @@ class ComputeTestCase(BaseTestCase):
self.mox.ReplayAll()
self.compute._destroy_evacuated_instances(fake_context)
def test_destroy_evacuated_instance_not_implemented(self):
@mock.patch('nova.objects.MigrationList.get_by_filters')
@mock.patch('nova.objects.Migration.save')
def test_destroy_evacuated_instance_not_implemented(self, mock_save,
mock_get):
fake_context = context.get_admin_context()
# instances in central db
@ -6639,6 +6658,9 @@ class ComputeTestCase(BaseTestCase):
evacuated_instance = self._create_fake_instance_obj(
{'host': 'otherhost'})
migration = objects.Migration(instance_uuid=evacuated_instance.uuid)
mock_get.return_value = [migration]
instances.append(evacuated_instance)
self.mox.StubOutWithMock(self.compute,
@ -9791,6 +9813,11 @@ class ComputeAPITestCase(BaseTestCase):
instance.refresh()
self.assertEqual(instance.task_state, task_states.REBUILDING)
self.assertEqual(instance.host, 'fake_dest_host')
migs = objects.MigrationList.get_by_filters(
self.context, {'source_host': 'fake_host'})
self.assertEqual(1, len(migs))
self.assertEqual('fake_host', migs[0].source_compute)
self.assertEqual('accepted', migs[0].status)
def test_fail_evacuate_from_non_existing_host(self):
inst = {}

View File

@ -401,11 +401,13 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
self.mox.UnsetStubs()
@mock.patch('nova.objects.InstanceList')
def test_cleanup_host(self, mock_instance_list):
@mock.patch('nova.objects.MigrationList.get_by_filters')
def test_cleanup_host(self, mock_miglist_get, mock_instance_list):
# just testing whether the cleanup_host method
# when fired will invoke the underlying driver's
# equivalent method.
mock_miglist_get.return_value = []
mock_instance_list.get_by_host.return_value = []
with mock.patch.object(self.compute, 'driver') as mock_driver:
@ -426,12 +428,16 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
self.compute.init_virt_events()
self.assertFalse(mock_register.called)
def test_init_host_with_deleted_migration(self):
@mock.patch('nova.objects.MigrationList.get_by_filters')
@mock.patch('nova.objects.Migration.save')
def test_init_host_with_evacuated_instance(self, mock_save, mock_mig_get):
our_host = self.compute.host
not_our_host = 'not-' + our_host
deleted_instance = fake_instance.fake_instance_obj(
self.context, host=not_our_host, uuid='fake-uuid')
migration = objects.Migration(instance_uuid=deleted_instance.uuid)
mock_mig_get.return_value = [migration]
self.mox.StubOutWithMock(self.compute.driver, 'init_host')
self.mox.StubOutWithMock(self.compute.driver, 'destroy')
@ -2054,13 +2060,12 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
self._do_test_set_admin_password_driver_error(
exc, vm_states.ACTIVE, None, expected_exception)
def _test_init_host_with_partial_migration(self, task_state=None,
vm_state=vm_states.ACTIVE):
def test_destroy_evacuated_instances(self):
our_host = self.compute.host
instance_1 = objects.Instance(self.context)
instance_1.uuid = 'foo'
instance_1.task_state = task_state
instance_1.vm_state = vm_state
instance_1.task_state = None
instance_1.vm_state = vm_states.ACTIVE
instance_1.host = 'not-' + our_host
instance_2 = objects.Instance(self.context)
instance_2.uuid = 'bar'
@ -2068,6 +2073,9 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
instance_2.vm_state = vm_states.ACTIVE
instance_2.host = 'not-' + our_host
# Only instance 2 has a migration record
migration = objects.Migration(instance_uuid=instance_2.uuid)
with contextlib.nested(
mock.patch.object(self.compute, '_get_instances_on_driver',
return_value=[instance_1,
@ -2078,45 +2086,19 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
return_value={}),
mock.patch.object(self.compute, '_is_instance_storage_shared',
return_value=False),
mock.patch.object(self.compute.driver, 'destroy')
mock.patch.object(self.compute.driver, 'destroy'),
mock.patch('nova.objects.MigrationList.get_by_filters'),
mock.patch('nova.objects.Migration.save')
) as (_get_instances_on_driver, get_instance_nw_info,
_get_instance_block_device_info, _is_instance_storage_shared,
destroy):
destroy, migration_list, migration_save):
migration_list.return_value = [migration]
self.compute._destroy_evacuated_instances(self.context)
# Only instance 2 should be deleted. Instance 1 is still running
# here, but no migration from our host exists, so ignore it
destroy.assert_called_once_with(self.context, instance_2, None,
{}, True)
def test_init_host_with_partial_migration_migrating(self):
self._test_init_host_with_partial_migration(
task_state=task_states.MIGRATING)
def test_init_host_with_partial_migration_resize_migrating(self):
self._test_init_host_with_partial_migration(
task_state=task_states.RESIZE_MIGRATING)
def test_init_host_with_partial_migration_resize_migrated(self):
self._test_init_host_with_partial_migration(
task_state=task_states.RESIZE_MIGRATED)
def test_init_host_with_partial_migration_finish_resize(self):
self._test_init_host_with_partial_migration(
task_state=task_states.RESIZE_FINISH)
def test_init_host_with_partial_migration_resized(self):
self._test_init_host_with_partial_migration(
vm_state=vm_states.RESIZED)
@mock.patch('nova.compute.manager.ComputeManager._get_instances_on_driver')
def test_evacuate_disabled(self, mock_giod):
self.flags(destroy_after_evacuate=False, group='workarounds')
inst = mock.MagicMock()
inst.uuid = 'foo'
inst.host = self.compute.host + '-alt'
mock_giod.return_value = [inst]
with mock.patch.object(self.compute.driver, 'destroy') as mock_d:
self.compute._destroy_evacuated_instances(mock.MagicMock())
self.assertFalse(mock_d.called)
@mock.patch('nova.compute.manager.ComputeManager.'
'_destroy_evacuated_instances')
@mock.patch('nova.compute.manager.LOG')

View File

@ -1248,7 +1248,7 @@ class MigrationTestCase(test.TestCase):
def _create(self, status='migrating', source_compute='host1',
source_node='a', dest_compute='host2', dest_node='b',
system_metadata=None):
system_metadata=None, migration_type=None):
values = {'host': source_compute}
instance = db.instance_create(self.ctxt, values)
@ -1258,7 +1258,8 @@ class MigrationTestCase(test.TestCase):
values = {'status': status, 'source_compute': source_compute,
'source_node': source_node, 'dest_compute': dest_compute,
'dest_node': dest_node, 'instance_uuid': instance['uuid']}
'dest_node': dest_node, 'instance_uuid': instance['uuid'],
'migration_type': migration_type}
db.migration_create(self.ctxt, values)
def _assert_in_progress(self, migrations):
@ -1312,6 +1313,16 @@ class MigrationTestCase(test.TestCase):
hosts = [migration['source_compute'], migration['dest_compute']]
self.assertIn(filters["host"], hosts)
def test_get_migrations_by_filters_with_type(self):
self._create(status="special", source_compute="host9",
migration_type="evacuation")
self._create(status="special", source_compute="host9",
migration_type="live-migration")
filters = {"status": "special", "host": "host9",
"migration_type": "evacuation", "hidden": False}
migrations = db.migration_get_all_by_filters(self.ctxt, filters)
self.assertEqual(1, len(migrations))
def test_get_migrations_by_filters_source_compute(self):
filters = {'source_compute': 'host2'}
migrations = db.migration_get_all_by_filters(self.ctxt, filters)

View File

@ -110,7 +110,9 @@ workarounds_opts = [
'https://bugs.launchpad.net/nova/+bug/1334398'),
cfg.BoolOpt('destroy_after_evacuate',
default=True,
help='Whether to destroy instances on startup when we suspect '
deprecated_for_removal=True,
help='DEPRECATED: Whether to destroy '
'instances on startup when we suspect '
'they have previously been evacuated. This can result in '
'data loss if undesired. See '
'https://launchpad.net/bugs/1419785'),