Merge "Add functional regression test for bug 1837955" into stable/rocky
This commit is contained in:
commit
979ec661de
|
@ -0,0 +1,115 @@
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
|
from nova import exception
|
||||||
|
from nova.tests import fixtures as nova_fixtures
|
||||||
|
from nova.tests.functional import integrated_helpers
|
||||||
|
from nova.tests.unit import fake_notifier
|
||||||
|
from nova.tests.unit.image import fake as fake_image
|
||||||
|
|
||||||
|
|
||||||
|
class BuildRescheduleClaimFailsTestCase(
|
||||||
|
integrated_helpers.ProviderUsageBaseTestCase):
|
||||||
|
"""Regression test case for bug 1837955 where a server build fails on the
|
||||||
|
primary host and then attempting to allocate resources on the alternate
|
||||||
|
host, the alternate host is full and the allocations claim in placement
|
||||||
|
fails, resulting in the build failing due to MaxRetriesExceeded and the
|
||||||
|
server going to ERROR status.
|
||||||
|
"""
|
||||||
|
compute_driver = 'fake.SmallFakeDriver'
|
||||||
|
|
||||||
|
def _wait_for_unversioned_notification(self, event_type):
|
||||||
|
for x in range(20): # wait up to 10 seconds
|
||||||
|
for notification in fake_notifier.NOTIFICATIONS:
|
||||||
|
if notification.event_type == event_type:
|
||||||
|
return notification
|
||||||
|
time.sleep(.5)
|
||||||
|
self.fail('Timed out waiting for unversioned notification %s. Got: %s'
|
||||||
|
% (event_type, fake_notifier.NOTIFICATIONS))
|
||||||
|
|
||||||
|
def test_build_reschedule_alt_host_alloc_fails(self):
|
||||||
|
# Start two compute services so we have one alternate host.
|
||||||
|
# Set cpu_allocation_ratio=1.0 to make placement inventory
|
||||||
|
# and allocations for VCPU easier to manage.
|
||||||
|
self.flags(cpu_allocation_ratio=1.0)
|
||||||
|
for x in range(2):
|
||||||
|
self._start_compute('host%i' % x)
|
||||||
|
|
||||||
|
def fake_instance_claim(_self, _context, _inst, nodename, *a, **kw):
|
||||||
|
# Before triggering the reschedule to the other host, max out the
|
||||||
|
# capacity on the alternate host.
|
||||||
|
alt_nodename = 'host0' if nodename == 'host1' else 'host1'
|
||||||
|
rp_uuid = self._get_provider_uuid_by_host(alt_nodename)
|
||||||
|
inventories = self._get_provider_inventory(rp_uuid)
|
||||||
|
# Fake some other consumer taking all of the VCPU on the alt host.
|
||||||
|
# Since we set cpu_allocation_ratio=1.0 the total is the total
|
||||||
|
# capacity for VCPU on the host.
|
||||||
|
total_vcpu = inventories['VCPU']['total']
|
||||||
|
alt_consumer = '7d32d0bc-af16-44b2-8019-a24925d76152'
|
||||||
|
allocs = {
|
||||||
|
'allocations': {
|
||||||
|
rp_uuid: {
|
||||||
|
'resources': {
|
||||||
|
'VCPU': total_vcpu
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'project_id': self.api.project_id,
|
||||||
|
'user_id': self.api.project_id
|
||||||
|
}
|
||||||
|
resp = self.placement_api.put(
|
||||||
|
'/allocations/%s' % alt_consumer, allocs, version='1.12')
|
||||||
|
self.assertEqual(204, resp.status, resp.content)
|
||||||
|
raise exception.ComputeResourcesUnavailable(reason='overhead!')
|
||||||
|
|
||||||
|
# Stub out the instance claim (regardless of which host the scheduler
|
||||||
|
# picks as the primary) to trigger a reschedule.
|
||||||
|
self.stub_out('nova.compute.manager.resource_tracker.ResourceTracker.'
|
||||||
|
'instance_claim', fake_instance_claim)
|
||||||
|
|
||||||
|
# Now that our stub is in place, try to create a server and wait for it
|
||||||
|
# to go to ERROR status.
|
||||||
|
server = self._build_minimal_create_server_request(
|
||||||
|
self.api, 'test_build_reschedule_alt_host_alloc_fails',
|
||||||
|
image_uuid=fake_image.get_valid_image_id(),
|
||||||
|
networks=[{'port': nova_fixtures.NeutronFixture.port_1['id']}])
|
||||||
|
server = self.api.post_server({'server': server})
|
||||||
|
# FIXME(mriedem): This is bug 1837955 where the status is stuck in
|
||||||
|
# BUILD rather than the vm_state being set to error and the task_state
|
||||||
|
# being set to None. Uncomment this when the bug is fixed.
|
||||||
|
# server = self._wait_for_state_change(self.api, server, 'ERROR')
|
||||||
|
|
||||||
|
# Wait for the MaxRetriesExceeded fault to be recorded.
|
||||||
|
# set_vm_state_and_notify sets the vm_state to ERROR before the fault
|
||||||
|
# is recorded but after the notification is sent. So wait for the
|
||||||
|
# unversioned notification to show up and then get the fault.
|
||||||
|
# FIXME(mriedem): Uncomment this when bug 1837955 is fixed.
|
||||||
|
# self._wait_for_unversioned_notification(
|
||||||
|
# 'compute_task.build_instances')
|
||||||
|
# server = self.api.get_server(server['id'])
|
||||||
|
# self.assertIn('fault', server)
|
||||||
|
# self.assertIn('Exceeded maximum number of retries',
|
||||||
|
# server['fault']['message'])
|
||||||
|
|
||||||
|
# TODO(mriedem): Remove this when the bug is fixed. We need to assert
|
||||||
|
# something before the bug is fixed to show the failure so check the
|
||||||
|
# logs.
|
||||||
|
for x in range(20):
|
||||||
|
logs = self.stdlog.logger.output
|
||||||
|
if 'MaxRetriesExceeded' in logs:
|
||||||
|
break
|
||||||
|
time.sleep(.5)
|
||||||
|
else:
|
||||||
|
self.fail('Timed out waiting for MaxRetriesExceeded to show up '
|
||||||
|
'in the logs.')
|
Loading…
Reference in New Issue