Make scheduler.utils.setup_instance_group query all cells
To check affinity and anti-affinity policies for scheduling instances, we use the RequestSpec.instance_group.hosts field to check the hosts that have group members on them. Access of the 'hosts' field calls InstanceGroup.get_hosts during a lazy-load and get_hosts does a query for all instances that are members of the group and returns their hosts after removing duplicates. The InstanceList query isn't targeting any cells, so it will return [] in a multi-cell environment in both the instance create case and the instance move case. In the move case, we do have a cell-targeted RequestContext when setup_instance_group is called *but* the RequestSpec.instance_group object is queried early in compute/api before we're targeted to a cell, so a call of RequestSpec.instance_group.get_hosts() will result in [] still, even for move operations. This makes setup_instance_group query all cells for instances that are members of the instance group if the RequestContext is untargeted, else it queries the targeted cell for the instances. Closes-Bug: #1746863 Change-Id: Ia5f5a0d75953b1154a8de3e1eaa15f8042e32d77
This commit is contained in:
parent
ac6e51c10d
commit
14f4c502f9
@ -15,6 +15,7 @@
|
||||
import copy
|
||||
|
||||
from oslo_db import exception as db_exc
|
||||
from oslo_log import log as logging
|
||||
from oslo_serialization import jsonutils
|
||||
from oslo_utils import uuidutils
|
||||
from oslo_utils import versionutils
|
||||
@ -31,6 +32,7 @@ from nova.objects import fields
|
||||
|
||||
|
||||
LAZY_LOAD_FIELDS = ['hosts']
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _instance_group_get_query(context, id_field=None, id=None):
|
||||
@ -341,6 +343,12 @@ class InstanceGroup(base.NovaPersistentObject, base.NovaObject,
|
||||
raise exception.ObjectActionError(
|
||||
action='obj_load_attr', reason='unable to load %s' % attrname)
|
||||
|
||||
LOG.debug("Lazy-loading '%(attr)s' on %(name)s uuid %(uuid)s",
|
||||
{'attr': attrname,
|
||||
'name': self.obj_name(),
|
||||
'uuid': self.uuid,
|
||||
})
|
||||
|
||||
self.hosts = self.get_hosts()
|
||||
self.obj_reset_changes(['hosts'])
|
||||
|
||||
|
@ -28,6 +28,7 @@ from nova.api.openstack.placement import lib as placement_lib
|
||||
from nova.compute import flavors
|
||||
from nova.compute import utils as compute_utils
|
||||
import nova.conf
|
||||
from nova import context as nova_context
|
||||
from nova import exception
|
||||
from nova.i18n import _
|
||||
from nova import objects
|
||||
@ -813,12 +814,44 @@ def _get_group_details(context, instance_uuid, user_group_hosts=None):
|
||||
msg = _("ServerGroupSoftAntiAffinityWeigher not configured")
|
||||
LOG.error(msg)
|
||||
raise exception.UnsupportedPolicyException(reason=msg)
|
||||
# NOTE(melwitt): If the context is already targeted to a cell (during a
|
||||
# move operation), we don't need to scatter-gather.
|
||||
if context.db_connection:
|
||||
# We don't need to target the group object's context because it was
|
||||
# retrieved with the targeted context earlier in this method.
|
||||
group_hosts = set(group.get_hosts())
|
||||
else:
|
||||
group_hosts = set(_get_instance_group_hosts_all_cells(context,
|
||||
group))
|
||||
user_hosts = set(user_group_hosts) if user_group_hosts else set()
|
||||
return GroupDetails(hosts=user_hosts | group_hosts,
|
||||
policy=group.policy, members=group.members)
|
||||
|
||||
|
||||
def _get_instance_group_hosts_all_cells(context, instance_group):
|
||||
def get_hosts_in_cell(cell_context):
|
||||
# NOTE(melwitt): The obj_alternate_context is going to mutate the
|
||||
# cell_instance_group._context and to do this in a scatter-gather
|
||||
# with multiple parallel greenthreads, we need the instance groups
|
||||
# to be separate object copies.
|
||||
cell_instance_group = instance_group.obj_clone()
|
||||
with cell_instance_group.obj_alternate_context(cell_context):
|
||||
return cell_instance_group.get_hosts()
|
||||
|
||||
results = nova_context.scatter_gather_skip_cell0(context,
|
||||
get_hosts_in_cell)
|
||||
hosts = []
|
||||
for result in results.values():
|
||||
# TODO(melwitt): We will need to handle scenarios where an exception
|
||||
# is raised while targeting a cell and when a cell does not respond
|
||||
# as part of the "handling of a down cell" spec:
|
||||
# https://blueprints.launchpad.net/nova/+spec/handling-down-cell
|
||||
if result not in (nova_context.did_not_respond_sentinel,
|
||||
nova_context.raised_exception_sentinel):
|
||||
hosts.extend(result)
|
||||
return hosts
|
||||
|
||||
|
||||
def setup_instance_group(context, request_spec):
|
||||
"""Add group_hosts and group_policies fields to filter_properties dict
|
||||
based on instance uuids provided in request_spec, if those instances are
|
||||
@ -826,11 +859,30 @@ def setup_instance_group(context, request_spec):
|
||||
|
||||
:param request_spec: Request spec
|
||||
"""
|
||||
# NOTE(melwitt): Proactively query for the instance group hosts instead of
|
||||
# relying on a lazy-load via the 'hosts' field of the InstanceGroup object.
|
||||
if (request_spec.instance_group and
|
||||
'hosts' not in request_spec.instance_group):
|
||||
group = request_spec.instance_group
|
||||
# If the context is already targeted to a cell (during a move
|
||||
# operation), we don't need to scatter-gather. We do need to use
|
||||
# obj_alternate_context here because the RequestSpec is queried at the
|
||||
# start of a move operation in compute/api, before the context has been
|
||||
# targeted.
|
||||
if context.db_connection:
|
||||
with group.obj_alternate_context(context):
|
||||
group.hosts = group.get_hosts()
|
||||
else:
|
||||
group.hosts = _get_instance_group_hosts_all_cells(context, group)
|
||||
|
||||
if request_spec.instance_group and request_spec.instance_group.hosts:
|
||||
group_hosts = request_spec.instance_group.hosts
|
||||
else:
|
||||
group_hosts = None
|
||||
instance_uuid = request_spec.instance_uuid
|
||||
# This queries the group details for the group where the instance is a
|
||||
# member. The group_hosts passed in are the hosts that contain members of
|
||||
# the requested instance group.
|
||||
group_info = _get_group_details(context, instance_uuid, group_hosts)
|
||||
if group_info is not None:
|
||||
request_spec.instance_group.hosts = list(group_info.hosts)
|
||||
|
@ -966,12 +966,7 @@ class ServerGroupTestMultiCell(ServerGroupTestBase):
|
||||
# cell1. So boot the server to cell2 where the group member cannot be
|
||||
# found as a result of the default setting.
|
||||
self._boot_a_server_to_group(created_group, az='cell2')
|
||||
# TODO(melwitt): Uncomment this when the bug is fixed.
|
||||
# self._boot_a_server_to_group(created_group, az='cell1',
|
||||
# expected_status='ERROR')
|
||||
# TODO(melwitt): Remove this when the bug is fixed.
|
||||
# Boot a server to cell1 with the affinity policy. This should fail
|
||||
# because a group member has landed in cell2 already. But it passes
|
||||
# because of the bug -- the query for group members doesn't find any
|
||||
# members in cell2 because the query isn't multi-cell enabled.
|
||||
self._boot_a_server_to_group(created_group, az='cell1')
|
||||
# because group members found in cell2 should violate the policy.
|
||||
self._boot_a_server_to_group(created_group, az='cell1',
|
||||
expected_status='ERROR')
|
||||
|
@ -327,7 +327,8 @@ class _TestInstanceGroupObject(object):
|
||||
mock_get_by_filt.return_value = [objects.Instance(host='host1'),
|
||||
objects.Instance(host='host2')]
|
||||
|
||||
obj = objects.InstanceGroup(self.context, members=['uuid1'])
|
||||
obj = objects.InstanceGroup(self.context, members=['uuid1'],
|
||||
uuid=uuids.group)
|
||||
self.assertEqual(2, len(obj.hosts))
|
||||
self.assertIn('host1', obj.hosts)
|
||||
self.assertIn('host2', obj.hosts)
|
||||
|
@ -21,6 +21,7 @@ import six
|
||||
|
||||
from nova.compute import flavors
|
||||
from nova.compute import utils as compute_utils
|
||||
from nova import context as nova_context
|
||||
from nova import exception
|
||||
from nova import objects
|
||||
from nova.scheduler import utils as scheduler_utils
|
||||
@ -34,7 +35,7 @@ class SchedulerUtilsTestCase(test.NoDBTestCase):
|
||||
"""Test case for scheduler utils methods."""
|
||||
def setUp(self):
|
||||
super(SchedulerUtilsTestCase, self).setUp()
|
||||
self.context = 'fake-context'
|
||||
self.context = nova_context.get_context()
|
||||
|
||||
def test_build_request_spec_without_image(self):
|
||||
instance = {'uuid': uuids.instance}
|
||||
|
Loading…
Reference in New Issue
Block a user