Add functional recreate test for bug 1764556
This attempts to recreate the following scenario: 1) boot instance on ocata host where the compute service does not have a uuid 2) migrate instance 3) delete the ocata service (thus deleting the compute node) 4) start compute service with the same name 5) migrate instance to newly-created compute node 6) upgrade to pike where services.uuid data migration happens 7) list instances as admin to join on the services table The failure occurs when listing instances because the deleted service with the same name as the compute host that the instance is running on gets pulled from the DB and the Service object attempts to set a uuid on it, which fails since it's not using a read_deleted="yes" context. While working on this, the service_get_all_by_binary DB API method had to be fixed to not hard-code read_deleted="no" since the test needs to be able to read deleted services, which it can control via its own context object (note that RequestContext.read_deleted already defaults to "no" so the hard-coding in the DB API is unnecessarily restrictive). Change-Id: I4d60da26fcf0a77628d1fdf4e818884614fa4f02 Related-Bug: #1764556 (cherry picked from commit81f05f53d3
) (cherry picked from commit44709cfb5c
) (cherry picked from commiteadd78efe3
)
This commit is contained in:
parent
95a35cbee4
commit
de035bbfcc
|
@ -485,8 +485,7 @@ def service_get_by_host_and_topic(context, host, topic):
|
|||
|
||||
@pick_context_manager_reader
|
||||
def service_get_all_by_binary(context, binary, include_disabled=False):
|
||||
query = model_query(context, models.Service, read_deleted="no").\
|
||||
filter_by(binary=binary)
|
||||
query = model_query(context, models.Service).filter_by(binary=binary)
|
||||
if not include_disabled:
|
||||
query = query.filter_by(disabled=False)
|
||||
return query.all()
|
||||
|
|
|
@ -0,0 +1,162 @@
|
|||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import six
|
||||
|
||||
from nova import context as nova_context
|
||||
from nova.db import api as db
|
||||
from nova import test
|
||||
from nova.tests import fixtures as nova_fixtures
|
||||
from nova.tests.functional.api import client as api_client
|
||||
from nova.tests.functional import integrated_helpers
|
||||
from nova.tests.unit.image import fake as fake_image
|
||||
from nova.tests.unit import policy_fixture
|
||||
from nova import utils
|
||||
from nova.virt import fake as fake_virt
|
||||
|
||||
|
||||
class InstanceListWithDeletedServicesTestCase(
|
||||
test.TestCase, integrated_helpers.InstanceHelperMixin):
|
||||
"""Contains regression testing for bug 1764556 which is similar to bug
|
||||
1746509 but different in that it deals with a deleted service and compute
|
||||
node which was not upgraded to the point of having a UUID and that causes
|
||||
problems later when an instance which was running on that node is migrated
|
||||
back to an upgraded service with the same hostname. This is because the
|
||||
service uuid migration routine gets a ServiceNotFound error when loading
|
||||
up a deleted service by hostname.
|
||||
"""
|
||||
def setUp(self):
|
||||
super(InstanceListWithDeletedServicesTestCase, self).setUp()
|
||||
self.useFixture(policy_fixture.RealPolicyFixture())
|
||||
|
||||
# The NeutronFixture is needed to stub out validate_networks in API.
|
||||
self.useFixture(nova_fixtures.NeutronFixture(self))
|
||||
|
||||
# We need the computes reporting into placement for the filter
|
||||
# scheduler to pick a host.
|
||||
self.useFixture(nova_fixtures.PlacementFixture())
|
||||
|
||||
api_fixture = self.useFixture(nova_fixtures.OSAPIFixture(
|
||||
api_version='v2.1'))
|
||||
self.api = api_fixture.api
|
||||
self.admin_api = api_fixture.admin_api
|
||||
self.admin_api.microversion = 'latest'
|
||||
|
||||
# the image fake backend needed for image discovery
|
||||
fake_image.stub_out_image_service(self)
|
||||
self.addCleanup(fake_image.FakeImageService_reset)
|
||||
# Get the image before we set the microversion to latest to avoid
|
||||
# the proxy issues with GET /images in 2.36.
|
||||
self.image_id = self.api.get_images()[0]['id']
|
||||
self.api.microversion = 'latest'
|
||||
|
||||
self.start_service('conductor')
|
||||
self.start_service('scheduler')
|
||||
|
||||
def _migrate_server(self, server, target_host):
|
||||
self.admin_api.api_post('/servers/%s/action' % server['id'],
|
||||
{'migrate': None})
|
||||
server = self._wait_for_state_change(
|
||||
self.admin_api, server, 'VERIFY_RESIZE')
|
||||
self.assertEqual(target_host, server['OS-EXT-SRV-ATTR:host'])
|
||||
self.admin_api.api_post('/servers/%s/action' % server['id'],
|
||||
{'confirmResize': None},
|
||||
check_response_status=[204])
|
||||
server = self._wait_for_state_change(self.api, server, 'ACTIVE')
|
||||
return server
|
||||
|
||||
def test_instance_list_deleted_service_with_no_uuid(self):
|
||||
"""This test covers the following scenario:
|
||||
|
||||
1. create an instance on a host which we'll simulate to be old
|
||||
by not having a uuid set
|
||||
2. migrate the instance to the "newer" host that has a service uuid
|
||||
3. delete the old service/compute node
|
||||
4. start a new service with the old hostname (still host1); this will
|
||||
also create a new compute_nodes table record for that host/node
|
||||
5. migrate the instance back to the host1 service
|
||||
6. list instances which will try to online migrate the old service uuid
|
||||
"""
|
||||
fake_virt.set_nodes(['host1'])
|
||||
self.addCleanup(fake_virt.restore_nodes)
|
||||
host1 = self.start_service('compute', host='host1')
|
||||
|
||||
# Create an instance which will be on host1 since it's the only host.
|
||||
server_req = self._build_minimal_create_server_request(
|
||||
self.api, 'test_instance_list_deleted_service_with_no_uuid',
|
||||
image_uuid=self.image_id, networks='none')
|
||||
server = self.api.post_server({'server': server_req})
|
||||
self._wait_for_state_change(self.api, server, 'ACTIVE')
|
||||
|
||||
# Now we start a 2nd compute which is "upgraded" (has a uuid) and
|
||||
# we'll migrate the instance to that host.
|
||||
fake_virt.set_nodes(['host2'])
|
||||
self.addCleanup(fake_virt.restore_nodes)
|
||||
host2 = self.start_service('compute', host='host2')
|
||||
self.assertIsNotNone(host2.service_ref.uuid)
|
||||
|
||||
server = self._migrate_server(server, 'host2')
|
||||
|
||||
# Delete the host1 service (which implicitly deletes the host1 compute
|
||||
# node record).
|
||||
host1.stop()
|
||||
self.admin_api.api_delete(
|
||||
'/os-services/%s' % host1.service_ref.uuid)
|
||||
# We should now only have 1 compute service (host2).
|
||||
compute_services = self.admin_api.api_get(
|
||||
'/os-services?binary=nova-compute').body['services']
|
||||
self.assertEqual(1, len(compute_services))
|
||||
# Make sure the compute node is also gone.
|
||||
self.admin_api.api_get(
|
||||
'/os-hypervisors?hypervisor_hostname_pattern=host1',
|
||||
check_response_status=[404])
|
||||
|
||||
# Now recreate the host1 service and compute node by restarting the
|
||||
# service.
|
||||
self.restart_compute_service(host1)
|
||||
# At this point, host1's service should have a uuid.
|
||||
self.assertIsNotNone(host1.service_ref.uuid)
|
||||
|
||||
# Sanity check that there are 3 services in the database, but only 1
|
||||
# is deleted.
|
||||
ctxt = nova_context.get_admin_context()
|
||||
with utils.temporary_mutation(ctxt, read_deleted='yes'):
|
||||
services = db.service_get_all_by_binary(ctxt, 'nova-compute')
|
||||
self.assertEqual(3, len(services))
|
||||
deleted_services = [svc for svc in services if svc['deleted']]
|
||||
self.assertEqual(1, len(deleted_services))
|
||||
deleted_service = deleted_services[0]
|
||||
self.assertEqual('host1', deleted_service['host'])
|
||||
|
||||
# Now migrate the instance back to host1.
|
||||
self._migrate_server(server, 'host1')
|
||||
|
||||
# Now null out the service uuid to simulate that the deleted host1
|
||||
# is old. We have to do this through the DB API directly since the
|
||||
# Service object won't allow a null uuid field. We also have to do
|
||||
# this *after* deleting the service via the REST API and migrating the
|
||||
# server because otherwise that will set a uuid when looking up the
|
||||
# service.
|
||||
with utils.temporary_mutation(ctxt, read_deleted='yes'):
|
||||
service_ref = db.service_update(
|
||||
ctxt, deleted_service['id'], {'uuid': None})
|
||||
self.assertIsNone(service_ref['uuid'])
|
||||
|
||||
# Finally, list servers as an admin so it joins on services to get host
|
||||
# information.
|
||||
# FIXME(mriedem): This is bug 1764556 where the join on the services
|
||||
# table also pulls the deleted service that doesn't have a uuid and
|
||||
# attempts to migrate that service to have a uuid, which fails because
|
||||
# it's not using a read_deleted='yes' context.
|
||||
ex = self.assertRaises(api_client.OpenStackApiException,
|
||||
self.admin_api.get_servers, detail=True)
|
||||
self.assertIn('ServiceNotFound', six.text_type(ex))
|
Loading…
Reference in New Issue