ignore deleted server groups in validation

This change simply catches the exception raised when we lookup a servergroup via a hint and the validation upcall is enabled. Change-Id: I858b4da35382a9f4dcf88f4b6db340e1f34eb82d Closes-Bug: #1890244
2022-06-21 12:23:45 +01:00 · 2022-06-21 12:23:45 +01:00 · cd2c2f359b
commit cd2c2f359b
parent 84a84f7f2f
6 changed files with 155 additions and 13 deletions
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@ -1717,27 +1717,32 @@ class ComputeManager(manager.Manager):
        # hosts.  This is a validation step to make sure that starting the
        # instance here doesn't violate the policy.
        if scheduler_hints is not None:
-            # only go through here if scheduler_hints is provided, even if it
+            # only go through here if scheduler_hints is provided,
-            # is empty.
+            # even if it is empty.
            group_hint = scheduler_hints.get('group')
            if not group_hint:
                return
            else:
-                # The RequestSpec stores scheduler_hints as key=list pairs so
+                # The RequestSpec stores scheduler_hints as key=list pairs
-                # we need to check the type on the value and pull the single
+                # so we need to check the type on the value and pull the
-                # entry out. The API request schema validates that
+                # single entry out. The API request schema validates that
                # the 'group' hint is a single value.
                if isinstance(group_hint, list):
                    group_hint = group_hint[0]
-
+                try:
-                group = objects.InstanceGroup.get_by_hint(context, group_hint)
+                    group = objects.InstanceGroup.get_by_hint(
                        context, group_hint
                    )
                except exception.InstanceGroupNotFound:
                    return
        else:
            # TODO(ganso): a call to DB can be saved by adding request_spec
            # to rpcapi payload of live_migration, pre_live_migration and
            # check_can_live_migrate_destination
            try:
                group = objects.InstanceGroup.get_by_instance_uuid(
-                    context, instance.uuid)
+                    context, instance.uuid
                )
            except exception.InstanceGroupNotFound:
                return
--- a/nova/objects/request_spec.py
+++ b/nova/objects/request_spec.py
@ -645,6 +645,7 @@ class RequestSpec(base.NovaObject):
            except exception.InstanceGroupNotFound:
                # NOTE(danms): Instance group may have been deleted
                spec.instance_group = None
                spec.scheduler_hints.pop('group', None)
        if data_migrated:
            spec.save()
--- a/nova/tests/functional/regressions/test_bug_1890244.py
+++ b/nova/tests/functional/regressions/test_bug_1890244.py
@ -0,0 +1,96 @@
 # Copyright 2017 Ericsson
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from nova import context
 from nova import objects
 from nova import test
 from nova.tests import fixtures as nova_fixtures
 from nova.tests.functional import fixtures as func_fixtures
 from nova.tests.functional import integrated_helpers
 class IgnoreDeletedServerGroupsTest(
    test.TestCase, integrated_helpers.InstanceHelperMixin,
 ):
    """Regression test for bug 1890244
    If instance are created as member of server groups it
    should be possibel to evacuate them if the server groups are
    deleted prior to the host failure.
    """
    def setUp(self):
        super().setUp()
        # Stub out external dependencies.
        self.useFixture(nova_fixtures.NeutronFixture(self))
        self.useFixture(nova_fixtures.GlanceFixture(self))
        self.useFixture(func_fixtures.PlacementFixture())
        # Start nova controller services.
        api_fixture = self.useFixture(nova_fixtures.OSAPIFixture(
            api_version='v2.1'))
        self.api = api_fixture.admin_api
        self.start_service('conductor')
        # Use a custom weigher to make sure that we have a predictable
        # scheduling sort order.
        self.useFixture(nova_fixtures.HostNameWeigherFixture())
        self.start_service('scheduler')
        # Start two computes, one where the server will be created and another
        # where we'll evacuate it to.
        self.src = self._start_compute('host1')
        self.dest = self._start_compute('host2')
        self.notifier = self.useFixture(
            nova_fixtures.NotificationFixture(self)
        )
    def test_evacuate_after_group_delete(self):
        # Create an anti-affinity group for the server.
        body = {
            'server_group': {
                'name': 'test-group',
                'policies': ['anti-affinity']
            }
        }
        group_id = self.api.api_post(
            '/os-server-groups', body).body['server_group']['id']
        # Create a server in the group which should land on host1 due to our
        # custom weigher.
        body = {'server': self._build_server()}
        body['os:scheduler_hints'] = {'group': group_id}
        server = self.api.post_server(body)
        server = self._wait_for_state_change(server, 'ACTIVE')
        self.assertEqual('host1', server['OS-EXT-SRV-ATTR:host'])
        # Down the source compute to enable the evacuation
        self.api.microversion = '2.11'     # Cap for the force-down call.
        self.api.force_down_service('host1', 'nova-compute', True)
        self.api.microversion = 'latest'
        self.src.stop()
        # assert the server currently has a server group
        reqspec = objects.RequestSpec.get_by_instance_uuid(
            context.get_admin_context(), server['id'])
        self.assertIsNotNone(reqspec.instance_group)
        self.assertIn('group', reqspec.scheduler_hints)
        # then delete it so that we need to clean it up on evac
        self.api.api_delete(f'/os-server-groups/{group_id}')
        # Initiate evacuation
        server = self._evacuate_server(
            server, expected_host='host2', expected_migration_status='done'
        )
        reqspec = objects.RequestSpec.get_by_instance_uuid(
            context.get_admin_context(), server['id'])
        self.assertIsNone(reqspec.instance_group)
        self.assertNotIn('group', reqspec.scheduler_hints)
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@ -7601,11 +7601,14 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
        mock_get.side_effect = exception.InstanceGroupNotFound(
            group_uuid=uuids.group_hint
        )
-        # FIXME(sean-k-mooney): this should not leak the exception
+        # This implicitly asserts that no exception is raised since
-        self.assertRaises(
+        # uncaught exceptions would be treated as a test failure.
-            exception.InstanceGroupNotFound,
+        self.compute._validate_instance_group_policy(
-            self.compute._validate_instance_group_policy, self.context,
+            self.context, instance, hints
-            instance, hints)
+        )
        # and this just assert that we did in fact invoke the method
        # that raises to ensure that if we refactor in the future this
        # this test will fail if the function we mock is no longer called.
        mock_get.assert_called_once_with(self.context, uuids.group_hint)
    @mock.patch('nova.objects.InstanceGroup.get_by_uuid')
--- a/nova/tests/unit/objects/test_request_spec.py
+++ b/nova/tests/unit/objects/test_request_spec.py
@ -621,6 +621,30 @@ class _TestRequestSpecObject(object):
        self.assertIsInstance(req_obj.instance_group, objects.InstanceGroup)
        self.assertEqual('fresh', req_obj.instance_group.name)
    @mock.patch.object(
        request_spec.RequestSpec, '_get_by_instance_uuid_from_db'
    )
    @mock.patch('nova.objects.InstanceGroup.get_by_uuid')
    def test_get_by_instance_uuid_deleted_group(
            self, mock_get_ig, get_by_uuid
    ):
        fake_spec_obj = fake_request_spec.fake_spec_obj()
        fake_spec_obj.scheduler_hints['group'] = ['fresh']
        fake_spec = fake_request_spec.fake_db_spec(fake_spec_obj)
        get_by_uuid.return_value = fake_spec
        mock_get_ig.side_effect = exception.InstanceGroupNotFound(
            group_uuid=uuids.instgroup
        )
        req_obj = request_spec.RequestSpec.get_by_instance_uuid(
            self.context, fake_spec['instance_uuid']
        )
        # assert that both the instance_group object and scheduler hint
        # are cleared if the instance_group was deleted since the request
        # spec was last saved to the db.
        self.assertIsNone(req_obj.instance_group, objects.InstanceGroup)
        self.assertEqual({'hint': ['over-there']}, req_obj.scheduler_hints)
    @mock.patch('nova.objects.request_spec.RequestSpec.save')
    @mock.patch.object(
        request_spec.RequestSpec, '_get_by_instance_uuid_from_db')
--- a/releasenotes/notes/fix-group-policy-validation-with-deleted-groups-4f685fd1d6b84192.yaml
+++ b/releasenotes/notes/fix-group-policy-validation-with-deleted-groups-4f685fd1d6b84192.yaml
@ -0,0 +1,13 @@
 ---
 fixes:
  - |
    When the server group policy validation upcall is enabled
    nova will assert that the policy is not violated on move operations
    and initial instance creation. As noted in `bug 1890244`_, if a
    server was created in a server group and that group was later deleted
    the validation upcall would fail due to an uncaught excpetion if the
    server group was deleted. This prevented evacuate and other move
    operations form functioning. This has now been fixed and nova will
    ignore deleted server groups.
    .. _bug 1890244: https://bugs.launchpad.net/nova/+bug/1890244