Error anti-affinity violation on migrations

Error-out the migrations (cold and live) whenever the
anti-affinity policy is violated. This addresses
violations when multiple concurrent migrations are
requested.

Added detection on:
- prep_resize
- check_can_live_migration_destination
- pre_live_migration

The improved method of detection now locks based on group_id
and considers other migrations in-progress as well.

Closes-bug: #1821755
Change-Id: I32e6214568bb57f7613ddeba2c2c46da0320fabc
This commit is contained in:
Rodrigo Barbieri 2021-03-31 11:06:49 -03:00
parent da57eebc9e
commit 33c8af1f8c
3 changed files with 210 additions and 23 deletions

View File

@ -1610,7 +1610,11 @@ class ComputeManager(manager.Manager):
return [_decode(f) for f in injected_files]
def _validate_instance_group_policy(self, context, instance,
scheduler_hints):
scheduler_hints=None):
if CONF.workarounds.disable_group_policy_check_upcall:
return
# NOTE(russellb) Instance group policy is enforced by the scheduler.
# However, there is a race condition with the enforcement of
# the policy. Since more than one instance may be scheduled at the
@ -1619,29 +1623,63 @@ class ComputeManager(manager.Manager):
# multiple instances with an affinity policy could end up on different
# hosts. This is a validation step to make sure that starting the
# instance here doesn't violate the policy.
if scheduler_hints is not None:
# only go through here if scheduler_hints is provided, even if it
# is empty.
group_hint = scheduler_hints.get('group')
if not group_hint:
return
# The RequestSpec stores scheduler_hints as key=list pairs so we need
# to check the type on the value and pull the single entry out. The
# API request schema validates that the 'group' hint is a single value.
else:
# The RequestSpec stores scheduler_hints as key=list pairs so
# we need to check the type on the value and pull the single
# entry out. The API request schema validates that
# the 'group' hint is a single value.
if isinstance(group_hint, list):
group_hint = group_hint[0]
@utils.synchronized(group_hint)
def _do_validation(context, instance, group_hint):
group = objects.InstanceGroup.get_by_hint(context, group_hint)
else:
# TODO(ganso): a call to DB can be saved by adding request_spec
# to rpcapi payload of live_migration, pre_live_migration and
# check_can_live_migrate_destination
try:
group = objects.InstanceGroup.get_by_instance_uuid(
context, instance.uuid)
except exception.InstanceGroupNotFound:
return
@utils.synchronized(group['uuid'])
def _do_validation(context, instance, group):
if group.policy and 'anti-affinity' == group.policy:
# instances on host
instances_uuids = objects.InstanceList.get_uuids_by_host(
context, self.host)
ins_on_host = set(instances_uuids)
# instance param is just for logging, the nodename obtained is
# not actually related to the instance at all
nodename = self._get_nodename(instance)
# instances being migrated to host
migrations = (
objects.MigrationList.get_in_progress_by_host_and_node(
context, self.host, nodename))
migration_vm_uuids = set([mig['instance_uuid']
for mig in migrations])
total_instances = migration_vm_uuids | ins_on_host
# refresh group to get updated members within locked block
group = objects.InstanceGroup.get_by_uuid(context,
group['uuid'])
members = set(group.members)
# Determine the set of instance group members on this host
# which are not the instance in question. This is used to
# determine how many other members from the same anti-affinity
# group can be on this host.
members_on_host = ins_on_host & members - set([instance.uuid])
members_on_host = (total_instances & members -
set([instance.uuid]))
rules = group.rules
if rules and 'max_server_per_host' in rules:
max_server = rules['max_server_per_host']
@ -1653,6 +1691,12 @@ class ComputeManager(manager.Manager):
raise exception.RescheduledException(
instance_uuid=instance.uuid,
reason=msg)
# NOTE(ganso): The check for affinity below does not work and it
# can easily be violated because the lock happens in different
# compute hosts.
# The only fix seems to be a DB lock to perform the check whenever
# setting the host field to an instance.
elif group.policy and 'affinity' == group.policy:
group_hosts = group.get_hosts(exclude=[instance.uuid])
if group_hosts and self.host not in group_hosts:
@ -1661,8 +1705,7 @@ class ComputeManager(manager.Manager):
instance_uuid=instance.uuid,
reason=msg)
if not CONF.workarounds.disable_group_policy_check_upcall:
_do_validation(context, instance, group_hint)
_do_validation(context, instance, group)
def _log_original_error(self, exc_info, instance_uuid):
LOG.error('Error: %s', exc_info[1], instance_uuid=instance_uuid,
@ -5174,10 +5217,24 @@ class ComputeManager(manager.Manager):
with self._error_out_instance_on_exception(
context, instance, instance_state=instance_state),\
errors_out_migration_ctxt(migration):
self._send_prep_resize_notifications(
context, instance, fields.NotificationPhase.START,
flavor)
try:
scheduler_hints = self._get_scheduler_hints(filter_properties,
request_spec)
# Error out if this host cannot accept the new instance due
# to anti-affinity. At this point the migration is already
# in-progress, so this is the definitive moment to abort due to
# the policy violation. Also, exploding here is covered by the
# cleanup methods in except block.
try:
self._validate_instance_group_policy(context, instance,
scheduler_hints)
except exception.RescheduledException as e:
raise exception.InstanceFaultRollback(inner_exception=e)
self._prep_resize(context, image, instance,
flavor, filter_properties,
node, migration, request_spec,
@ -7909,6 +7966,20 @@ class ComputeManager(manager.Manager):
:param limits: objects.SchedulerLimits object for this live migration.
:returns: a LiveMigrateData object (hypervisor-dependent)
"""
# Error out if this host cannot accept the new instance due
# to anti-affinity. This check at this moment is not very accurate, as
# multiple requests may be happening concurrently and miss the lock,
# but when it works it provides a better user experience by failing
# earlier. Also, it should be safe to explode here, error becomes
# NoValidHost and instance status remains ACTIVE.
try:
self._validate_instance_group_policy(ctxt, instance)
except exception.RescheduledException as e:
msg = ("Failed to validate instance group policy "
"due to: {}".format(e))
raise exception.MigrationPreCheckError(reason=msg)
src_compute_info = obj_base.obj_to_primitive(
self._get_compute_info(ctxt, instance.host))
dst_compute_info = obj_base.obj_to_primitive(
@ -8048,6 +8119,13 @@ class ComputeManager(manager.Manager):
"""
LOG.debug('pre_live_migration data is %s', migrate_data)
# Error out if this host cannot accept the new instance due
# to anti-affinity. At this point the migration is already in-progress,
# so this is the definitive moment to abort due to the policy
# violation. Also, it should be safe to explode here. The instance
# status remains ACTIVE, migration status failed.
self._validate_instance_group_policy(context, instance)
migrate_data.old_vol_attachment_ids = {}
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
context, instance.uuid)

View File

@ -3389,12 +3389,16 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
CONF.host, instance.uuid, graceful_exit=False)
return result
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
side_effect=exception.InstanceGroupNotFound(group_uuid='')))
def test_check_can_live_migrate_destination_success(self):
self.useFixture(std_fixtures.MonkeyPatch(
'nova.network.neutron.API.supports_port_binding_extension',
lambda *args: True))
self._test_check_can_live_migrate_destination()
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
side_effect=exception.InstanceGroupNotFound(group_uuid='')))
def test_check_can_live_migrate_destination_fail(self):
self.useFixture(std_fixtures.MonkeyPatch(
'nova.network.neutron.API.supports_port_binding_extension',
@ -3404,7 +3408,9 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
self._test_check_can_live_migrate_destination,
do_raise=True)
def test_check_can_live_migrate_destination_contins_vifs(self):
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
side_effect=exception.InstanceGroupNotFound(group_uuid='')))
def test_check_can_live_migrate_destination_contains_vifs(self):
self.useFixture(std_fixtures.MonkeyPatch(
'nova.network.neutron.API.supports_port_binding_extension',
lambda *args: True))
@ -3412,6 +3418,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
self.assertIn('vifs', migrate_data)
self.assertIsNotNone(migrate_data.vifs)
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
side_effect=exception.InstanceGroupNotFound(group_uuid='')))
def test_check_can_live_migrate_destination_no_binding_extended(self):
self.useFixture(std_fixtures.MonkeyPatch(
'nova.network.neutron.API.supports_port_binding_extension',
@ -3419,18 +3427,40 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
migrate_data = self._test_check_can_live_migrate_destination()
self.assertNotIn('vifs', migrate_data)
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
side_effect=exception.InstanceGroupNotFound(group_uuid='')))
def test_check_can_live_migrate_destination_src_numa_lm_false(self):
self.useFixture(std_fixtures.MonkeyPatch(
'nova.network.neutron.API.supports_port_binding_extension',
lambda *args: True))
self._test_check_can_live_migrate_destination(src_numa_lm=False)
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
side_effect=exception.InstanceGroupNotFound(group_uuid='')))
def test_check_can_live_migrate_destination_src_numa_lm_true(self):
self.useFixture(std_fixtures.MonkeyPatch(
'nova.network.neutron.API.supports_port_binding_extension',
lambda *args: True))
self._test_check_can_live_migrate_destination(src_numa_lm=True)
@mock.patch.object(compute_utils, 'add_instance_fault_from_exc')
def test_check_can_live_migrate_destination_fail_group_policy(
self, mock_fail_db):
instance = fake_instance.fake_instance_obj(
self.context, host=self.compute.host, vm_state=vm_states.ACTIVE,
node='fake-node')
ex = exception.RescheduledException(
instance_uuid=instance.uuid, reason="policy violated")
with mock.patch.object(self.compute, '_validate_instance_group_policy',
side_effect=ex):
self.assertRaises(
exception.MigrationPreCheckError,
self.compute.check_can_live_migrate_destination,
self.context, instance, None, None, None, None)
def test_dest_can_numa_live_migrate(self):
positive_dest_check_data = objects.LibvirtLiveMigrateData(
dst_supports_numa_live_migration=True)
@ -7495,7 +7525,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
def test_validate_policy_honors_workaround_disabled(self, mock_get):
instance = objects.Instance(uuid=uuids.instance)
hints = {'group': 'foo'}
mock_get.return_value = objects.InstanceGroup(policy=None)
mock_get.return_value = objects.InstanceGroup(policy=None,
uuid=uuids.group)
self.compute._validate_instance_group_policy(self.context,
instance, hints)
mock_get.assert_called_once_with(self.context, 'foo')
@ -7521,10 +7552,14 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
instance, hints)
mock_get.assert_called_once_with(self.context, uuids.group_hint)
@mock.patch('nova.objects.InstanceGroup.get_by_uuid')
@mock.patch('nova.objects.InstanceList.get_uuids_by_host')
@mock.patch('nova.objects.InstanceGroup.get_by_hint')
def test_validate_instance_group_policy_with_rules(self, mock_get_by_hint,
mock_get_by_host):
@mock.patch.object(fake_driver.FakeDriver, 'get_available_nodes')
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
def test_validate_instance_group_policy_with_rules(
self, migration_list, nodes, mock_get_by_hint, mock_get_by_host,
mock_get_by_uuid):
# Create 2 instance in same host, inst2 created before inst1
instance = objects.Instance(uuid=uuids.inst1)
hints = {'group': [uuids.group_hint]}
@ -7533,17 +7568,26 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
mock_get_by_host.return_value = existing_insts
# if group policy rules limit to 1, raise RescheduledException
mock_get_by_hint.return_value = objects.InstanceGroup(
group = objects.InstanceGroup(
policy='anti-affinity', rules={'max_server_per_host': '1'},
hosts=['host1'], members=members_uuids)
hosts=['host1'], members=members_uuids,
uuid=uuids.group)
mock_get_by_hint.return_value = group
mock_get_by_uuid.return_value = group
nodes.return_value = ['nodename']
migration_list.return_value = [objects.Migration(
uuid=uuids.migration, instance_uuid=uuids.instance)]
self.assertRaises(exception.RescheduledException,
self.compute._validate_instance_group_policy,
self.context, instance, hints)
# if group policy rules limit change to 2, validate OK
mock_get_by_hint.return_value = objects.InstanceGroup(
group2 = objects.InstanceGroup(
policy='anti-affinity', rules={'max_server_per_host': 2},
hosts=['host1'], members=members_uuids)
hosts=['host1'], members=members_uuids,
uuid=uuids.group)
mock_get_by_hint.return_value = group2
mock_get_by_uuid.return_value = group2
self.compute._validate_instance_group_policy(self.context,
instance, hints)
@ -9072,6 +9116,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
manager.ComputeManager()
mock_executor.assert_called_once_with()
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
side_effect=exception.InstanceGroupNotFound(group_uuid='')))
def test_pre_live_migration_cinder_v3_api(self):
# This tests that pre_live_migration with a bdm with an
# attachment_id, will create a new attachment and update
@ -9149,6 +9195,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
_test()
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
side_effect=exception.InstanceGroupNotFound(group_uuid='')))
def test_pre_live_migration_exception_cinder_v3_api(self):
# The instance in this test has 2 attachments. The second attach_create
# will throw an exception. This will test that the first attachment
@ -9218,6 +9266,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
self.assertGreater(len(m.mock_calls), 0)
_test()
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
side_effect=exception.InstanceGroupNotFound(group_uuid='')))
def test_pre_live_migration_exceptions_delete_attachments(self):
# The instance in this test has 2 attachments. The call to
# driver.pre_live_migration will raise an exception. This will test
@ -10596,6 +10646,54 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
# (_error_out_instance_on_exception will set to ACTIVE by default).
self.assertEqual(vm_states.STOPPED, instance.vm_state)
@mock.patch('nova.compute.utils.notify_usage_exists')
@mock.patch('nova.compute.manager.ComputeManager.'
'_notify_about_instance_usage')
@mock.patch('nova.compute.utils.notify_about_resize_prep_instance')
@mock.patch('nova.objects.Instance.save')
@mock.patch('nova.compute.manager.ComputeManager._revert_allocation')
@mock.patch('nova.compute.manager.ComputeManager.'
'_reschedule_resize_or_reraise')
@mock.patch('nova.compute.utils.add_instance_fault_from_exc')
# this is almost copy-paste from test_prep_resize_fails_rollback
def test_prep_resize_fails_group_validation(
self, add_instance_fault_from_exc, _reschedule_resize_or_reraise,
_revert_allocation, mock_instance_save,
notify_about_resize_prep_instance, _notify_about_instance_usage,
notify_usage_exists):
"""Tests that if _validate_instance_group_policy raises
InstanceFaultRollback, the instance.vm_state is reset properly in
_error_out_instance_on_exception
"""
instance = fake_instance.fake_instance_obj(
self.context, host=self.compute.host, vm_state=vm_states.STOPPED,
node='fake-node', expected_attrs=['system_metadata', 'flavor'])
migration = mock.MagicMock(spec='nova.objects.Migration')
request_spec = mock.MagicMock(spec='nova.objects.RequestSpec')
ex = exception.RescheduledException(
instance_uuid=instance.uuid, reason="policy violated")
ex2 = exception.InstanceFaultRollback(
inner_exception=ex)
def fake_reschedule_resize_or_reraise(*args, **kwargs):
raise ex2
_reschedule_resize_or_reraise.side_effect = (
fake_reschedule_resize_or_reraise)
with mock.patch.object(self.compute, '_validate_instance_group_policy',
side_effect=ex):
self.assertRaises(
# _error_out_instance_on_exception should reraise the
# RescheduledException inside InstanceFaultRollback.
exception.RescheduledException, self.compute.prep_resize,
self.context, instance.image_meta, instance, instance.flavor,
request_spec, filter_properties={}, node=instance.node,
clean_shutdown=True, migration=migration, host_list=[])
# The instance.vm_state should remain unchanged
# (_error_out_instance_on_exception will set to ACTIVE by default).
self.assertEqual(vm_states.STOPPED, instance.vm_state)
@mock.patch('nova.compute.rpcapi.ComputeAPI.resize_instance')
@mock.patch('nova.compute.resource_tracker.ResourceTracker.resize_claim')
@mock.patch('nova.objects.Instance.save')

View File

@ -0,0 +1,11 @@
---
fixes:
- |
Improved detection of anti-affinity policy violation when performing live
and cold migrations. Most of the violations caused by race conditions due
to performing concurrent live or cold migrations should now be addressed
by extra checks in the compute service. Upon detection, cold migration
operations are automatically rescheduled, while live migrations have two
checks and will be rescheduled if detected by the first one, otherwise the
live migration will fail cleanly and revert the instance state back to its
previous value.