Add regression recreate test for bug 1830747

Before change I4244f7dd8fe74565180f73684678027067b4506e in Stein, when
a cold migration would reschedule to conductor it would not send the
RequestSpec, only the filter_properties. The filter_properties contain
a primitive version of the instance group information from the RequestSpec
for things like the group members, hosts and policies, but not the uuid.
When conductor is trying to reschedule the cold migration without a
RequestSpec, it builds a RequestSpec from the components it has, like the
filter_properties. This results in a RequestSpec with an instance_group
field set but with no uuid field in the RequestSpec.instance_group.
That RequestSpec gets persisted and then because of change
Ie70c77db753711e1449e99534d3b83669871943f, later attempts to load the
RequestSpec from the database will fail because of the missing
RequestSpec.instance_group.uuid.

The test added here recreates the pre-Stein scenario which could still
be a problem (on master) for any corrupted RequestSpecs for older
instances.

Change-Id: I05700c97f756edb7470be7273d5c9c3d76d63e29
Related-Bug: #1830747
This commit is contained in:
Matt Riedemann 2019-05-28 13:59:20 -04:00
parent 0793b4f641
commit c96c7c5e13
1 changed files with 158 additions and 0 deletions

View File

@ -0,0 +1,158 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import mock
import six
from nova.conductor import api as conductor_api
from nova import context as nova_context
from nova import exception
from nova import objects
from nova.scheduler import weights
from nova import test
from nova.tests import fixtures as nova_fixtures
from nova.tests.functional import fixtures as func_fixtures
from nova.tests.functional import integrated_helpers
from nova.tests.unit.image import fake as fake_image
from nova.virt import fake as fake_virt
class HostNameWeigher(weights.BaseHostWeigher):
# Prefer host1 over host2.
weights = {'host1': 100, 'host2': 50}
def _weigh_object(self, host_state, weight_properties):
return self.weights.get(host_state.host, 0)
class MissingReqSpecInstanceGroupUUIDTestCase(
test.TestCase, integrated_helpers.InstanceHelperMixin):
"""Regression recreate test for bug 1830747
Before change I4244f7dd8fe74565180f73684678027067b4506e in Stein, when
a cold migration would reschedule to conductor it would not send the
RequestSpec, only the filter_properties. The filter_properties contain
a primitive version of the instance group information from the RequestSpec
for things like the group members, hosts and policies, but not the uuid.
When conductor is trying to reschedule the cold migration without a
RequestSpec, it builds a RequestSpec from the components it has, like the
filter_properties. This results in a RequestSpec with an instance_group
field set but with no uuid field in the RequestSpec.instance_group.
That RequestSpec gets persisted and then because of change
Ie70c77db753711e1449e99534d3b83669871943f, later attempts to load the
RequestSpec from the database will fail because of the missing
RequestSpec.instance_group.uuid.
This test recreates the regression scenario by cold migrating a server
to a host which fails and triggers a reschedule but without the RequestSpec
so a RequestSpec is created/updated for the instance without the
instance_group.uuid set which will lead to a failure loading the
RequestSpec from the DB later.
"""
def setUp(self):
super(MissingReqSpecInstanceGroupUUIDTestCase, self).setUp()
# Stub out external dependencies.
self.useFixture(nova_fixtures.NeutronFixture(self))
self.useFixture(func_fixtures.PlacementFixture())
fake_image.stub_out_image_service(self)
self.addCleanup(fake_image.FakeImageService_reset)
# Configure the API to allow resizing to the same host so we can keep
# the number of computes down to two in the test.
self.flags(allow_resize_to_same_host=True)
# Start nova controller services.
api_fixture = self.useFixture(nova_fixtures.OSAPIFixture(
api_version='v2.1'))
self.api = api_fixture.admin_api
self.start_service('conductor')
# Use our custom weigher defined above to make sure that we have
# a predictable scheduling sort order.
self.flags(weight_classes=[__name__ + '.HostNameWeigher'],
group='filter_scheduler')
self.start_service('scheduler')
# Start two computes, one where the server will be created and another
# where we'll cold migrate it.
self.addCleanup(fake_virt.restore_nodes)
self.computes = {} # keep track of the compute services per host name
for host in ('host1', 'host2'):
fake_virt.set_nodes([host])
compute_service = self.start_service('compute', host=host)
self.computes[host] = compute_service
def test_cold_migrate_reschedule(self):
# Create an anti-affinity group for the server.
body = {
'server_group': {
'name': 'test-group',
'policies': ['anti-affinity']
}
}
group_id = self.api.api_post(
'/os-server-groups', body).body['server_group']['id']
# Create a server in the group which should land on host1 due to our
# custom weigher.
server = self._build_minimal_create_server_request(
self.api, 'test_cold_migrate_reschedule')
body = dict(server=server)
body['os:scheduler_hints'] = {'group': group_id}
server = self.api.post_server(body)
server = self._wait_for_state_change(self.api, server, 'ACTIVE')
self.assertEqual('host1', server['OS-EXT-SRV-ATTR:host'])
# Verify the group uuid is set in the request spec.
ctxt = nova_context.get_admin_context()
reqspec = objects.RequestSpec.get_by_instance_uuid(ctxt, server['id'])
self.assertEqual(group_id, reqspec.instance_group.uuid)
# Stub out the ComputeTaskAPI.resize_instance method to not pass the
# request spec from compute to conductor during a reschedule.
# This simulates the pre-Stein reschedule behavior.
original_resize_instance = conductor_api.ComputeTaskAPI.resize_instance
def stub_resize_instance(_self, context, instance,
extra_instance_updates, scheduler_hint,
*args, **kwargs):
# Only remove the request spec if we know we're rescheduling
# which we can determine from the filter_properties retry dict.
filter_properties = scheduler_hint['filter_properties']
if filter_properties.get('retry', {}).get('exc'):
kwargs.pop('request_spec', None)
return original_resize_instance(
_self, context, instance, extra_instance_updates,
scheduler_hint, *args, **kwargs)
self.stub_out('nova.conductor.api.ComputeTaskAPI.resize_instance',
stub_resize_instance)
# Now cold migrate the server. Because of allow_resize_to_same_host and
# the weigher, the scheduler will pick host1 first. The FakeDriver
# actually allows migrating to the same host so we need to stub that
# out so the compute will raise UnableToMigrateToSelf like when using
# the libvirt driver.
host1_driver = self.computes['host1'].driver
with mock.patch.dict(host1_driver.capabilities,
supports_migrate_to_same_host=False):
self.api.post_server_action(server['id'], {'migrate': None})
# FIXME(mriedem): Due to bug 1830747 we don't go to VERIFY_RESIZE
# because the reschedule fails and the instance is put into
# ERROR status. When the bug is fixed the status should be
# VERIFY_RESIZE and the server should be on host2.
server = self._wait_for_state_change(
self.api, server, 'ERROR')
self.assertEqual('host1', server['OS-EXT-SRV-ATTR:host'])
# And the RequestSpec.instance_group.uuid should be missing which
# leads to us failing to load the RequestSpec.
ex = self.assertRaises(exception.ObjectActionError,
objects.RequestSpec.get_by_instance_uuid,
ctxt, server['id'])
self.assertIn('unable to load uuid', six.text_type(ex))