Reproduce bug #1819460 in functional test

There are two calls during ConductorTaskManager.build_instances,
used during re-schedule, that could potentially raise exceptions
which leads to that the instance is stuck in BUILD state instead
of going to ERROR state.

This patch adds two functional testcase to reproduce the problems.

Conflicts:
       nova/tests/functional/test_servers.py

Change-Id: If80c4e4776b81cc06293989ee41d39b53735352b
Related-Bug: #1819460
(cherry picked from commit b63c42a0d4)
This commit is contained in:
Balazs Gibizer 2019-03-11 14:39:10 +01:00
parent 6755034e10
commit 07a1a8ff7d
2 changed files with 89 additions and 6 deletions

View File

@ -245,7 +245,8 @@ class _IntegratedTestBase(test.TestCase):
class InstanceHelperMixin(object):
def _wait_for_server_parameter(self, admin_api, server, expected_params,
max_retries=10):
max_retries=10,
fail_when_run_out_of_retries=True):
retry_count = 0
while True:
server = admin_api.get_server(server['id'])
@ -254,17 +255,22 @@ class InstanceHelperMixin(object):
break
retry_count += 1
if retry_count == max_retries:
if fail_when_run_out_of_retries:
self.fail('Wait for state change failed, '
'expected_params=%s, server=%s'
% (expected_params, server))
else:
break
time.sleep(0.5)
return server
def _wait_for_state_change(self, admin_api, server, expected_status,
max_retries=10):
max_retries=10,
fail_when_run_out_of_retries=True):
return self._wait_for_server_parameter(
admin_api, server, {'status': expected_status}, max_retries)
admin_api, server, {'status': expected_status}, max_retries,
fail_when_run_out_of_retries=fail_when_run_out_of_retries)
def _build_minimal_create_server_request(self, api, name, image_uuid=None,
flavor_id=None, networks=None,

View File

@ -35,11 +35,13 @@ from nova.compute import api as compute_api
from nova.compute import instance_actions
from nova.compute import manager as compute_manager
from nova.compute import rpcapi
from nova.conductor import manager
from nova import context
from nova import exception
from nova import objects
from nova.objects import block_device as block_device_obj
from nova import rc_fields
from nova.scheduler import utils
from nova.scheduler import weights
from nova import test
from nova.tests import fixtures as nova_fixtures
@ -4035,6 +4037,37 @@ class ServerRescheduleTests(integrated_helpers.ProviderUsageBaseTestCase):
# Ensure the allocation records on the destination host.
self.assertFlavorMatchesUsage(dest_rp_uuid, self.flavor1)
def test_allocation_fails_during_reschedule(self):
"""Verify that if nova fails to allocate resources during re-schedule
then the server is put into ERROR state properly.
"""
server_req = self._build_minimal_create_server_request(
self.api, 'some-server', flavor_id=self.flavor1['id'],
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
networks='none')
orig_claim = utils.claim_resources
# First call is during boot, we want that to succeed normally. Then the
# fake virt driver triggers a re-schedule. During that re-schedule we
# simulate that the placement call fails.
with mock.patch('nova.scheduler.utils.claim_resources',
side_effect=[
orig_claim,
exception.AllocationUpdateFailed(
consumer_uuid=uuids.inst1, error='testing')]):
server = self.api.post_server({'server': server_req})
# NOTE(gibi): Due to bug 1819460 the server stuck in BUILD state
# instead of going to ERROR state
server = self._wait_for_state_change(
self.admin_api, server, 'ERROR',
fail_when_run_out_of_retries=False)
self.assertEqual('BUILD', server['status'])
self._delete_and_check_allocations(server)
class ServerRescheduleTestsWithNestedResourcesRequest(ServerRescheduleTests):
compute_driver = 'fake.FakeRescheduleDriverWithNestedCustomResources'
@ -6754,3 +6787,47 @@ class PortResourceRequestReSchedulingTest(
updated_port = self.neutron.show_port(port['id'])['port']
binding_profile = updated_port['binding:profile']
self.assertNotIn('allocation', binding_profile)
def test_boot_reschedule_fill_provider_mapping_raises(self):
"""Verify that if the _fill_provider_mapping raises during re-schedule
then the instance is properly put into ERROR state.
"""
port = self.neutron.port_with_resource_request
# First call is during boot, we want that to succeed normally. Then the
# fake virt driver triggers a re-schedule. During that re-schedule the
# fill is called again, and we simulate that call raises.
fill = manager.ComputeTaskManager._fill_provider_mapping
with mock.patch(
'nova.conductor.manager.ComputeTaskManager.'
'_fill_provider_mapping',
side_effect=[
fill,
exception.ConsumerAllocationRetrievalFailed(
consumer_uuid=uuids.inst1, error='testing')],
autospec=True):
server = self._create_server(
flavor=self.flavor,
networks=[{'port': port['id']}])
# NOTE(gibi): Due to bug 1819460 the server stuck in BUILD state
server = self._wait_for_state_change(
self.admin_api, server, 'ERROR',
fail_when_run_out_of_retries=False)
self.assertEqual('BUILD', server['status'])
# NOTE(gibi): Due to bug 1819460 the server stuck in BUILD state and no
# error is presented to the user
# self.assertIn(
# 'Failed to retrieve allocations for consumer',
# server['fault']['message'])
#
# NOTE(gibi): even after delete the allocation of such server is leaked
# self._delete_and_check_allocations(server)
#
# # assert that unbind removes the allocation from the binding
# updated_port = self.neutron.show_port(port['id'])['port']
# binding_profile = updated_port['binding:profile']
# self.assertNotIn('allocation', binding_profile)