Error anti-affinity violation on migrations
Error-out the migrations (cold and live) whenever the anti-affinity policy is violated. This addresses violations when multiple concurrent migrations are requested. Added detection on: - prep_resize - check_can_live_migration_destination - pre_live_migration The improved method of detection now locks based on group_id and considers other migrations in-progress as well. Closes-bug: #1821755 Change-Id: I32e6214568bb57f7613ddeba2c2c46da0320fabc
This commit is contained in:
parent
da57eebc9e
commit
33c8af1f8c
@ -1610,7 +1610,11 @@ class ComputeManager(manager.Manager):
|
||||
return [_decode(f) for f in injected_files]
|
||||
|
||||
def _validate_instance_group_policy(self, context, instance,
|
||||
scheduler_hints):
|
||||
scheduler_hints=None):
|
||||
|
||||
if CONF.workarounds.disable_group_policy_check_upcall:
|
||||
return
|
||||
|
||||
# NOTE(russellb) Instance group policy is enforced by the scheduler.
|
||||
# However, there is a race condition with the enforcement of
|
||||
# the policy. Since more than one instance may be scheduled at the
|
||||
@ -1619,29 +1623,63 @@ class ComputeManager(manager.Manager):
|
||||
# multiple instances with an affinity policy could end up on different
|
||||
# hosts. This is a validation step to make sure that starting the
|
||||
# instance here doesn't violate the policy.
|
||||
group_hint = scheduler_hints.get('group')
|
||||
if not group_hint:
|
||||
return
|
||||
if scheduler_hints is not None:
|
||||
# only go through here if scheduler_hints is provided, even if it
|
||||
# is empty.
|
||||
group_hint = scheduler_hints.get('group')
|
||||
if not group_hint:
|
||||
return
|
||||
else:
|
||||
# The RequestSpec stores scheduler_hints as key=list pairs so
|
||||
# we need to check the type on the value and pull the single
|
||||
# entry out. The API request schema validates that
|
||||
# the 'group' hint is a single value.
|
||||
if isinstance(group_hint, list):
|
||||
group_hint = group_hint[0]
|
||||
|
||||
# The RequestSpec stores scheduler_hints as key=list pairs so we need
|
||||
# to check the type on the value and pull the single entry out. The
|
||||
# API request schema validates that the 'group' hint is a single value.
|
||||
if isinstance(group_hint, list):
|
||||
group_hint = group_hint[0]
|
||||
group = objects.InstanceGroup.get_by_hint(context, group_hint)
|
||||
else:
|
||||
# TODO(ganso): a call to DB can be saved by adding request_spec
|
||||
# to rpcapi payload of live_migration, pre_live_migration and
|
||||
# check_can_live_migrate_destination
|
||||
try:
|
||||
group = objects.InstanceGroup.get_by_instance_uuid(
|
||||
context, instance.uuid)
|
||||
except exception.InstanceGroupNotFound:
|
||||
return
|
||||
|
||||
@utils.synchronized(group_hint)
|
||||
def _do_validation(context, instance, group_hint):
|
||||
group = objects.InstanceGroup.get_by_hint(context, group_hint)
|
||||
@utils.synchronized(group['uuid'])
|
||||
def _do_validation(context, instance, group):
|
||||
if group.policy and 'anti-affinity' == group.policy:
|
||||
|
||||
# instances on host
|
||||
instances_uuids = objects.InstanceList.get_uuids_by_host(
|
||||
context, self.host)
|
||||
ins_on_host = set(instances_uuids)
|
||||
|
||||
# instance param is just for logging, the nodename obtained is
|
||||
# not actually related to the instance at all
|
||||
nodename = self._get_nodename(instance)
|
||||
|
||||
# instances being migrated to host
|
||||
migrations = (
|
||||
objects.MigrationList.get_in_progress_by_host_and_node(
|
||||
context, self.host, nodename))
|
||||
migration_vm_uuids = set([mig['instance_uuid']
|
||||
for mig in migrations])
|
||||
|
||||
total_instances = migration_vm_uuids | ins_on_host
|
||||
|
||||
# refresh group to get updated members within locked block
|
||||
group = objects.InstanceGroup.get_by_uuid(context,
|
||||
group['uuid'])
|
||||
members = set(group.members)
|
||||
# Determine the set of instance group members on this host
|
||||
# which are not the instance in question. This is used to
|
||||
# determine how many other members from the same anti-affinity
|
||||
# group can be on this host.
|
||||
members_on_host = ins_on_host & members - set([instance.uuid])
|
||||
members_on_host = (total_instances & members -
|
||||
set([instance.uuid]))
|
||||
rules = group.rules
|
||||
if rules and 'max_server_per_host' in rules:
|
||||
max_server = rules['max_server_per_host']
|
||||
@ -1653,6 +1691,12 @@ class ComputeManager(manager.Manager):
|
||||
raise exception.RescheduledException(
|
||||
instance_uuid=instance.uuid,
|
||||
reason=msg)
|
||||
|
||||
# NOTE(ganso): The check for affinity below does not work and it
|
||||
# can easily be violated because the lock happens in different
|
||||
# compute hosts.
|
||||
# The only fix seems to be a DB lock to perform the check whenever
|
||||
# setting the host field to an instance.
|
||||
elif group.policy and 'affinity' == group.policy:
|
||||
group_hosts = group.get_hosts(exclude=[instance.uuid])
|
||||
if group_hosts and self.host not in group_hosts:
|
||||
@ -1661,8 +1705,7 @@ class ComputeManager(manager.Manager):
|
||||
instance_uuid=instance.uuid,
|
||||
reason=msg)
|
||||
|
||||
if not CONF.workarounds.disable_group_policy_check_upcall:
|
||||
_do_validation(context, instance, group_hint)
|
||||
_do_validation(context, instance, group)
|
||||
|
||||
def _log_original_error(self, exc_info, instance_uuid):
|
||||
LOG.error('Error: %s', exc_info[1], instance_uuid=instance_uuid,
|
||||
@ -5174,10 +5217,24 @@ class ComputeManager(manager.Manager):
|
||||
with self._error_out_instance_on_exception(
|
||||
context, instance, instance_state=instance_state),\
|
||||
errors_out_migration_ctxt(migration):
|
||||
|
||||
self._send_prep_resize_notifications(
|
||||
context, instance, fields.NotificationPhase.START,
|
||||
flavor)
|
||||
try:
|
||||
scheduler_hints = self._get_scheduler_hints(filter_properties,
|
||||
request_spec)
|
||||
# Error out if this host cannot accept the new instance due
|
||||
# to anti-affinity. At this point the migration is already
|
||||
# in-progress, so this is the definitive moment to abort due to
|
||||
# the policy violation. Also, exploding here is covered by the
|
||||
# cleanup methods in except block.
|
||||
try:
|
||||
self._validate_instance_group_policy(context, instance,
|
||||
scheduler_hints)
|
||||
except exception.RescheduledException as e:
|
||||
raise exception.InstanceFaultRollback(inner_exception=e)
|
||||
|
||||
self._prep_resize(context, image, instance,
|
||||
flavor, filter_properties,
|
||||
node, migration, request_spec,
|
||||
@ -7909,6 +7966,20 @@ class ComputeManager(manager.Manager):
|
||||
:param limits: objects.SchedulerLimits object for this live migration.
|
||||
:returns: a LiveMigrateData object (hypervisor-dependent)
|
||||
"""
|
||||
|
||||
# Error out if this host cannot accept the new instance due
|
||||
# to anti-affinity. This check at this moment is not very accurate, as
|
||||
# multiple requests may be happening concurrently and miss the lock,
|
||||
# but when it works it provides a better user experience by failing
|
||||
# earlier. Also, it should be safe to explode here, error becomes
|
||||
# NoValidHost and instance status remains ACTIVE.
|
||||
try:
|
||||
self._validate_instance_group_policy(ctxt, instance)
|
||||
except exception.RescheduledException as e:
|
||||
msg = ("Failed to validate instance group policy "
|
||||
"due to: {}".format(e))
|
||||
raise exception.MigrationPreCheckError(reason=msg)
|
||||
|
||||
src_compute_info = obj_base.obj_to_primitive(
|
||||
self._get_compute_info(ctxt, instance.host))
|
||||
dst_compute_info = obj_base.obj_to_primitive(
|
||||
@ -8048,6 +8119,13 @@ class ComputeManager(manager.Manager):
|
||||
"""
|
||||
LOG.debug('pre_live_migration data is %s', migrate_data)
|
||||
|
||||
# Error out if this host cannot accept the new instance due
|
||||
# to anti-affinity. At this point the migration is already in-progress,
|
||||
# so this is the definitive moment to abort due to the policy
|
||||
# violation. Also, it should be safe to explode here. The instance
|
||||
# status remains ACTIVE, migration status failed.
|
||||
self._validate_instance_group_policy(context, instance)
|
||||
|
||||
migrate_data.old_vol_attachment_ids = {}
|
||||
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
|
||||
context, instance.uuid)
|
||||
|
@ -3389,12 +3389,16 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
CONF.host, instance.uuid, graceful_exit=False)
|
||||
return result
|
||||
|
||||
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
|
||||
side_effect=exception.InstanceGroupNotFound(group_uuid='')))
|
||||
def test_check_can_live_migrate_destination_success(self):
|
||||
self.useFixture(std_fixtures.MonkeyPatch(
|
||||
'nova.network.neutron.API.supports_port_binding_extension',
|
||||
lambda *args: True))
|
||||
self._test_check_can_live_migrate_destination()
|
||||
|
||||
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
|
||||
side_effect=exception.InstanceGroupNotFound(group_uuid='')))
|
||||
def test_check_can_live_migrate_destination_fail(self):
|
||||
self.useFixture(std_fixtures.MonkeyPatch(
|
||||
'nova.network.neutron.API.supports_port_binding_extension',
|
||||
@ -3404,7 +3408,9 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
self._test_check_can_live_migrate_destination,
|
||||
do_raise=True)
|
||||
|
||||
def test_check_can_live_migrate_destination_contins_vifs(self):
|
||||
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
|
||||
side_effect=exception.InstanceGroupNotFound(group_uuid='')))
|
||||
def test_check_can_live_migrate_destination_contains_vifs(self):
|
||||
self.useFixture(std_fixtures.MonkeyPatch(
|
||||
'nova.network.neutron.API.supports_port_binding_extension',
|
||||
lambda *args: True))
|
||||
@ -3412,6 +3418,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
self.assertIn('vifs', migrate_data)
|
||||
self.assertIsNotNone(migrate_data.vifs)
|
||||
|
||||
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
|
||||
side_effect=exception.InstanceGroupNotFound(group_uuid='')))
|
||||
def test_check_can_live_migrate_destination_no_binding_extended(self):
|
||||
self.useFixture(std_fixtures.MonkeyPatch(
|
||||
'nova.network.neutron.API.supports_port_binding_extension',
|
||||
@ -3419,18 +3427,40 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
migrate_data = self._test_check_can_live_migrate_destination()
|
||||
self.assertNotIn('vifs', migrate_data)
|
||||
|
||||
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
|
||||
side_effect=exception.InstanceGroupNotFound(group_uuid='')))
|
||||
def test_check_can_live_migrate_destination_src_numa_lm_false(self):
|
||||
self.useFixture(std_fixtures.MonkeyPatch(
|
||||
'nova.network.neutron.API.supports_port_binding_extension',
|
||||
lambda *args: True))
|
||||
self._test_check_can_live_migrate_destination(src_numa_lm=False)
|
||||
|
||||
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
|
||||
side_effect=exception.InstanceGroupNotFound(group_uuid='')))
|
||||
def test_check_can_live_migrate_destination_src_numa_lm_true(self):
|
||||
self.useFixture(std_fixtures.MonkeyPatch(
|
||||
'nova.network.neutron.API.supports_port_binding_extension',
|
||||
lambda *args: True))
|
||||
self._test_check_can_live_migrate_destination(src_numa_lm=True)
|
||||
|
||||
@mock.patch.object(compute_utils, 'add_instance_fault_from_exc')
|
||||
def test_check_can_live_migrate_destination_fail_group_policy(
|
||||
self, mock_fail_db):
|
||||
|
||||
instance = fake_instance.fake_instance_obj(
|
||||
self.context, host=self.compute.host, vm_state=vm_states.ACTIVE,
|
||||
node='fake-node')
|
||||
|
||||
ex = exception.RescheduledException(
|
||||
instance_uuid=instance.uuid, reason="policy violated")
|
||||
|
||||
with mock.patch.object(self.compute, '_validate_instance_group_policy',
|
||||
side_effect=ex):
|
||||
self.assertRaises(
|
||||
exception.MigrationPreCheckError,
|
||||
self.compute.check_can_live_migrate_destination,
|
||||
self.context, instance, None, None, None, None)
|
||||
|
||||
def test_dest_can_numa_live_migrate(self):
|
||||
positive_dest_check_data = objects.LibvirtLiveMigrateData(
|
||||
dst_supports_numa_live_migration=True)
|
||||
@ -7495,7 +7525,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
||||
def test_validate_policy_honors_workaround_disabled(self, mock_get):
|
||||
instance = objects.Instance(uuid=uuids.instance)
|
||||
hints = {'group': 'foo'}
|
||||
mock_get.return_value = objects.InstanceGroup(policy=None)
|
||||
mock_get.return_value = objects.InstanceGroup(policy=None,
|
||||
uuid=uuids.group)
|
||||
self.compute._validate_instance_group_policy(self.context,
|
||||
instance, hints)
|
||||
mock_get.assert_called_once_with(self.context, 'foo')
|
||||
@ -7521,10 +7552,14 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
||||
instance, hints)
|
||||
mock_get.assert_called_once_with(self.context, uuids.group_hint)
|
||||
|
||||
@mock.patch('nova.objects.InstanceGroup.get_by_uuid')
|
||||
@mock.patch('nova.objects.InstanceList.get_uuids_by_host')
|
||||
@mock.patch('nova.objects.InstanceGroup.get_by_hint')
|
||||
def test_validate_instance_group_policy_with_rules(self, mock_get_by_hint,
|
||||
mock_get_by_host):
|
||||
@mock.patch.object(fake_driver.FakeDriver, 'get_available_nodes')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
def test_validate_instance_group_policy_with_rules(
|
||||
self, migration_list, nodes, mock_get_by_hint, mock_get_by_host,
|
||||
mock_get_by_uuid):
|
||||
# Create 2 instance in same host, inst2 created before inst1
|
||||
instance = objects.Instance(uuid=uuids.inst1)
|
||||
hints = {'group': [uuids.group_hint]}
|
||||
@ -7533,17 +7568,26 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
||||
mock_get_by_host.return_value = existing_insts
|
||||
|
||||
# if group policy rules limit to 1, raise RescheduledException
|
||||
mock_get_by_hint.return_value = objects.InstanceGroup(
|
||||
group = objects.InstanceGroup(
|
||||
policy='anti-affinity', rules={'max_server_per_host': '1'},
|
||||
hosts=['host1'], members=members_uuids)
|
||||
hosts=['host1'], members=members_uuids,
|
||||
uuid=uuids.group)
|
||||
mock_get_by_hint.return_value = group
|
||||
mock_get_by_uuid.return_value = group
|
||||
nodes.return_value = ['nodename']
|
||||
migration_list.return_value = [objects.Migration(
|
||||
uuid=uuids.migration, instance_uuid=uuids.instance)]
|
||||
self.assertRaises(exception.RescheduledException,
|
||||
self.compute._validate_instance_group_policy,
|
||||
self.context, instance, hints)
|
||||
|
||||
# if group policy rules limit change to 2, validate OK
|
||||
mock_get_by_hint.return_value = objects.InstanceGroup(
|
||||
group2 = objects.InstanceGroup(
|
||||
policy='anti-affinity', rules={'max_server_per_host': 2},
|
||||
hosts=['host1'], members=members_uuids)
|
||||
hosts=['host1'], members=members_uuids,
|
||||
uuid=uuids.group)
|
||||
mock_get_by_hint.return_value = group2
|
||||
mock_get_by_uuid.return_value = group2
|
||||
self.compute._validate_instance_group_policy(self.context,
|
||||
instance, hints)
|
||||
|
||||
@ -9072,6 +9116,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
|
||||
manager.ComputeManager()
|
||||
mock_executor.assert_called_once_with()
|
||||
|
||||
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
|
||||
side_effect=exception.InstanceGroupNotFound(group_uuid='')))
|
||||
def test_pre_live_migration_cinder_v3_api(self):
|
||||
# This tests that pre_live_migration with a bdm with an
|
||||
# attachment_id, will create a new attachment and update
|
||||
@ -9149,6 +9195,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
|
||||
|
||||
_test()
|
||||
|
||||
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
|
||||
side_effect=exception.InstanceGroupNotFound(group_uuid='')))
|
||||
def test_pre_live_migration_exception_cinder_v3_api(self):
|
||||
# The instance in this test has 2 attachments. The second attach_create
|
||||
# will throw an exception. This will test that the first attachment
|
||||
@ -9218,6 +9266,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
|
||||
self.assertGreater(len(m.mock_calls), 0)
|
||||
_test()
|
||||
|
||||
@mock.patch('nova.objects.InstanceGroup.get_by_instance_uuid', mock.Mock(
|
||||
side_effect=exception.InstanceGroupNotFound(group_uuid='')))
|
||||
def test_pre_live_migration_exceptions_delete_attachments(self):
|
||||
# The instance in this test has 2 attachments. The call to
|
||||
# driver.pre_live_migration will raise an exception. This will test
|
||||
@ -10596,6 +10646,54 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
|
||||
# (_error_out_instance_on_exception will set to ACTIVE by default).
|
||||
self.assertEqual(vm_states.STOPPED, instance.vm_state)
|
||||
|
||||
@mock.patch('nova.compute.utils.notify_usage_exists')
|
||||
@mock.patch('nova.compute.manager.ComputeManager.'
|
||||
'_notify_about_instance_usage')
|
||||
@mock.patch('nova.compute.utils.notify_about_resize_prep_instance')
|
||||
@mock.patch('nova.objects.Instance.save')
|
||||
@mock.patch('nova.compute.manager.ComputeManager._revert_allocation')
|
||||
@mock.patch('nova.compute.manager.ComputeManager.'
|
||||
'_reschedule_resize_or_reraise')
|
||||
@mock.patch('nova.compute.utils.add_instance_fault_from_exc')
|
||||
# this is almost copy-paste from test_prep_resize_fails_rollback
|
||||
def test_prep_resize_fails_group_validation(
|
||||
self, add_instance_fault_from_exc, _reschedule_resize_or_reraise,
|
||||
_revert_allocation, mock_instance_save,
|
||||
notify_about_resize_prep_instance, _notify_about_instance_usage,
|
||||
notify_usage_exists):
|
||||
"""Tests that if _validate_instance_group_policy raises
|
||||
InstanceFaultRollback, the instance.vm_state is reset properly in
|
||||
_error_out_instance_on_exception
|
||||
"""
|
||||
instance = fake_instance.fake_instance_obj(
|
||||
self.context, host=self.compute.host, vm_state=vm_states.STOPPED,
|
||||
node='fake-node', expected_attrs=['system_metadata', 'flavor'])
|
||||
migration = mock.MagicMock(spec='nova.objects.Migration')
|
||||
request_spec = mock.MagicMock(spec='nova.objects.RequestSpec')
|
||||
ex = exception.RescheduledException(
|
||||
instance_uuid=instance.uuid, reason="policy violated")
|
||||
ex2 = exception.InstanceFaultRollback(
|
||||
inner_exception=ex)
|
||||
|
||||
def fake_reschedule_resize_or_reraise(*args, **kwargs):
|
||||
raise ex2
|
||||
|
||||
_reschedule_resize_or_reraise.side_effect = (
|
||||
fake_reschedule_resize_or_reraise)
|
||||
|
||||
with mock.patch.object(self.compute, '_validate_instance_group_policy',
|
||||
side_effect=ex):
|
||||
self.assertRaises(
|
||||
# _error_out_instance_on_exception should reraise the
|
||||
# RescheduledException inside InstanceFaultRollback.
|
||||
exception.RescheduledException, self.compute.prep_resize,
|
||||
self.context, instance.image_meta, instance, instance.flavor,
|
||||
request_spec, filter_properties={}, node=instance.node,
|
||||
clean_shutdown=True, migration=migration, host_list=[])
|
||||
# The instance.vm_state should remain unchanged
|
||||
# (_error_out_instance_on_exception will set to ACTIVE by default).
|
||||
self.assertEqual(vm_states.STOPPED, instance.vm_state)
|
||||
|
||||
@mock.patch('nova.compute.rpcapi.ComputeAPI.resize_instance')
|
||||
@mock.patch('nova.compute.resource_tracker.ResourceTracker.resize_claim')
|
||||
@mock.patch('nova.objects.Instance.save')
|
||||
|
11
releasenotes/notes/bug-1821755-7bd03319e34b6b10.yaml
Normal file
11
releasenotes/notes/bug-1821755-7bd03319e34b6b10.yaml
Normal file
@ -0,0 +1,11 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Improved detection of anti-affinity policy violation when performing live
|
||||
and cold migrations. Most of the violations caused by race conditions due
|
||||
to performing concurrent live or cold migrations should now be addressed
|
||||
by extra checks in the compute service. Upon detection, cold migration
|
||||
operations are automatically rescheduled, while live migrations have two
|
||||
checks and will be rescheduled if detected by the first one, otherwise the
|
||||
live migration will fail cleanly and revert the instance state back to its
|
||||
previous value.
|
Loading…
Reference in New Issue
Block a user