Add functional test for anti-affinity cross-cell migration

This adds a functional test to ensure that the scheduler
properly restricts anti-affinity group members to different
hosts regardless of which cell those hosts are in. The comments
in the setup_instance_group scheduler utility method assume
that all moves are within the same cell and as such instance
group hosts will be within the same cell, which is not true in
the case of an anti-affinity group cross-cell move. The test
added here would actually fail were it not for change
I4b67ec9dd4ce846a704d0f75ad64c41e693de0fb in Rocky to the
ServerGroupAntiAffinityFilter such that the filter does not
look at the request_spec.instance_group.hosts but only the
members. A clarifying comment is added to the scheduler utils
code in case we ever need to fix that.

Part of blueprint cross-cell-resize

Change-Id: I29f0dfdc52fda6d0b7c378801ee40ec7ad8fdf40
This commit is contained in:
Matt Riedemann 2019-05-28 18:58:13 -04:00
parent 92d4362f8e
commit fbebfcaf34
2 changed files with 93 additions and 18 deletions

View File

@ -1094,6 +1094,10 @@ def setup_instance_group(context, request_spec):
# obj_alternate_context here because the RequestSpec is queried at the
# start of a move operation in compute/api, before the context has been
# targeted.
# NOTE(mriedem): If doing a cross-cell move and the group policy
# is anti-affinity, this could be wrong since there could be
# instances in the group on other hosts in other cells. However,
# ServerGroupAntiAffinityFilter does not look at group.hosts.
if context.db_connection:
with group.obj_alternate_context(context):
group.hosts = group.get_hosts()

View File

@ -20,6 +20,7 @@ from nova import context as nova_context
from nova.db import api as db_api
from nova import exception
from nova import objects
from nova.scheduler import utils as scheduler_utils
from nova.scheduler import weights
from nova.tests import fixtures as nova_fixtures
from nova.tests.functional import integrated_helpers
@ -124,20 +125,28 @@ class TestMultiCellMigrate(integrated_helpers.ProviderUsageBaseTestCase):
limits = self.api.get_limits()['absolute']
self.assertEqual(expected_num_instances, limits['totalInstancesUsed'])
def _create_server(self, flavor, volume_backed=False):
def _create_server(self, flavor, volume_backed=False, group_id=None,
no_networking=False):
"""Creates a server and waits for it to be ACTIVE
:param flavor: dict form of the flavor to use
:param volume_backed: True if the server should be volume-backed
:param group_id: UUID of a server group in which to create the server
:param no_networking: True if the server should be creating without
networking, otherwise it will be created with a specific port and
VIF tag
:returns: server dict response from the GET /servers/{server_id} API
"""
# Provide a VIF tag for the pre-existing port. Since VIF tags are
# stored in the virtual_interfaces table in the cell DB, we want to
# make sure those survive the resize to another cell.
networks = [{
'port': self.neutron.port_1['id'],
'tag': 'private'
}]
if no_networking:
networks = 'none'
else:
# Provide a VIF tag for the pre-existing port. Since VIF tags are
# stored in the virtual_interfaces table in the cell DB, we want to
# make sure those survive the resize to another cell.
networks = [{
'port': self.neutron.port_1['id'],
'tag': 'private'
}]
image_uuid = fake_image.get_valid_image_id()
server = self._build_minimal_create_server_request(
'test_cross_cell_resize',
@ -158,7 +167,10 @@ class TestMultiCellMigrate(integrated_helpers.ProviderUsageBaseTestCase):
# We don't need the imageRef for volume-backed servers.
server.pop('imageRef', None)
server = self.api.post_server({'server': server})
req = dict(server=server)
if group_id:
req['os:scheduler_hints'] = {'group': group_id}
server = self.api.post_server(req)
server = self._wait_for_state_change(server, 'ACTIVE')
# For volume-backed make sure there is one attachment to start.
if volume_backed:
@ -865,15 +877,74 @@ class TestMultiCellMigrate(integrated_helpers.ProviderUsageBaseTestCase):
# resize_claim and a subsequent alternative host works, and also the
# case that all hosts fail the resize_claim.
# TODO(mriedem): Test cross-cell anti-affinity group assumptions from
# scheduler utils setup_instance_group where it assumes moves are within
# the same cell, so:
# 0. create 2 hosts in cell1 and 1 host in cell2
# 1. create two servers in an anti-affinity group in cell1
# 2. migrate one server to cell2
# 3. migrate the other server to cell2 - this should fail during scheduling
# because there is already a server from the anti-affinity group on the
# host in cell2 but setup_instance_group code may not catch it.
def test_anti_affinity_group(self):
"""Tests an anti-affinity group scenario where a server is moved across
cells and then trying to move the other from the same group to the same
host in the target cell should be rejected by the scheduler.
"""
# Create an anti-affinity server group for our servers.
body = {
'server_group': {
'name': 'test_anti_affinity_group',
'policy': 'anti-affinity'
}
}
group_id = self.api.api_post(
'/os-server-groups', body).body['server_group']['id']
# Create a server in the group in cell1 (should land on host1 due to
# HostNameWeigher).
flavor = self.api.get_flavors()[0]
server1 = self._create_server(
flavor, group_id=group_id, no_networking=True)
# Start another compute host service in cell1.
self._start_compute(
'host3', cell_name=self.host_to_cell_mappings['host1'])
# Create another server but we want it on host3 in cell1. We cannot
# use the az forced host parameter because then we will not be able to
# move the server across cells later. The HostNameWeigher will prefer
# host2 in cell2 so we need to temporarily force host2 down.
host2_service_uuid = self.computes['host2'].service_ref.uuid
self.admin_api.put_service_force_down(
host2_service_uuid, forced_down=True)
server2 = self._create_server(
flavor, group_id=group_id, no_networking=True)
self.assertEqual('host3', server2['OS-EXT-SRV-ATTR:host'])
# Remove the forced-down status of the host2 compute service so we can
# migrate there.
self.admin_api.put_service_force_down(
host2_service_uuid, forced_down=False)
# Now migrate server1 which should move it to host2 in cell2 otherwise
# it would violate the anti-affinity policy since server2 is on host3
# in cell1.
self.admin_api.post_server_action(server1['id'], {'migrate': None})
server1 = self._wait_for_state_change(server1, 'VERIFY_RESIZE')
self.assertEqual('host2', server1['OS-EXT-SRV-ATTR:host'])
self.admin_api.post_server_action(
server1['id'], {'confirmResize': None})
self._wait_for_state_change(server1, 'ACTIVE')
# At this point we have:
# server1: host2 in cell2
# server2: host3 in cell1
# The server group hosts should reflect that.
ctxt = nova_context.get_admin_context()
group = objects.InstanceGroup.get_by_uuid(ctxt, group_id)
group_hosts = scheduler_utils._get_instance_group_hosts_all_cells(
ctxt, group)
self.assertEqual(['host2', 'host3'], sorted(group_hosts))
# Try to migrate server2 to host2 in cell2 which should fail scheduling
# because it violates the anti-affinity policy. Note that without
# change I4b67ec9dd4ce846a704d0f75ad64c41e693de0fb in
# ServerGroupAntiAffinityFilter this would fail because the scheduler
# utils setup_instance_group only looks at the group hosts in the
# source cell.
self.admin_api.post_server_action(
server2['id'], {'migrate': {'host': 'host2'}})
self._wait_for_migration_status(server2, ['error'])
# TODO(mriedem): Perform a resize with at-capacity computes, meaning that
# when we revert we can only fit the instance with the old flavor back