From 07a1a8ff7dcb00283ba7ebb6f59a70002a4ee4db Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Mon, 11 Mar 2019 14:39:10 +0100 Subject: [PATCH] Reproduce bug #1819460 in functional test There are two calls during ConductorTaskManager.build_instances, used during re-schedule, that could potentially raise exceptions which leads to that the instance is stuck in BUILD state instead of going to ERROR state. This patch adds two functional testcase to reproduce the problems. Conflicts: nova/tests/functional/test_servers.py Change-Id: If80c4e4776b81cc06293989ee41d39b53735352b Related-Bug: #1819460 (cherry picked from commit b63c42a0d4836fd0364cb306145d3474619f1e19) --- nova/tests/functional/integrated_helpers.py | 18 +++-- nova/tests/functional/test_servers.py | 77 +++++++++++++++++++++ 2 files changed, 89 insertions(+), 6 deletions(-) diff --git a/nova/tests/functional/integrated_helpers.py b/nova/tests/functional/integrated_helpers.py index a5f1d65e7548..fe6d8fd42598 100644 --- a/nova/tests/functional/integrated_helpers.py +++ b/nova/tests/functional/integrated_helpers.py @@ -245,7 +245,8 @@ class _IntegratedTestBase(test.TestCase): class InstanceHelperMixin(object): def _wait_for_server_parameter(self, admin_api, server, expected_params, - max_retries=10): + max_retries=10, + fail_when_run_out_of_retries=True): retry_count = 0 while True: server = admin_api.get_server(server['id']) @@ -254,17 +255,22 @@ class InstanceHelperMixin(object): break retry_count += 1 if retry_count == max_retries: - self.fail('Wait for state change failed, ' - 'expected_params=%s, server=%s' - % (expected_params, server)) + if fail_when_run_out_of_retries: + self.fail('Wait for state change failed, ' + 'expected_params=%s, server=%s' + % (expected_params, server)) + else: + break time.sleep(0.5) return server def _wait_for_state_change(self, admin_api, server, expected_status, - max_retries=10): + max_retries=10, + fail_when_run_out_of_retries=True): return self._wait_for_server_parameter( - admin_api, server, {'status': expected_status}, max_retries) + admin_api, server, {'status': expected_status}, max_retries, + fail_when_run_out_of_retries=fail_when_run_out_of_retries) def _build_minimal_create_server_request(self, api, name, image_uuid=None, flavor_id=None, networks=None, diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index 6371bf21c306..067a3b86ee30 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -35,11 +35,13 @@ from nova.compute import api as compute_api from nova.compute import instance_actions from nova.compute import manager as compute_manager from nova.compute import rpcapi +from nova.conductor import manager from nova import context from nova import exception from nova import objects from nova.objects import block_device as block_device_obj from nova import rc_fields +from nova.scheduler import utils from nova.scheduler import weights from nova import test from nova.tests import fixtures as nova_fixtures @@ -4035,6 +4037,37 @@ class ServerRescheduleTests(integrated_helpers.ProviderUsageBaseTestCase): # Ensure the allocation records on the destination host. self.assertFlavorMatchesUsage(dest_rp_uuid, self.flavor1) + def test_allocation_fails_during_reschedule(self): + """Verify that if nova fails to allocate resources during re-schedule + then the server is put into ERROR state properly. + """ + + server_req = self._build_minimal_create_server_request( + self.api, 'some-server', flavor_id=self.flavor1['id'], + image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6', + networks='none') + + orig_claim = utils.claim_resources + # First call is during boot, we want that to succeed normally. Then the + # fake virt driver triggers a re-schedule. During that re-schedule we + # simulate that the placement call fails. + with mock.patch('nova.scheduler.utils.claim_resources', + side_effect=[ + orig_claim, + exception.AllocationUpdateFailed( + consumer_uuid=uuids.inst1, error='testing')]): + + server = self.api.post_server({'server': server_req}) + # NOTE(gibi): Due to bug 1819460 the server stuck in BUILD state + # instead of going to ERROR state + server = self._wait_for_state_change( + self.admin_api, server, 'ERROR', + fail_when_run_out_of_retries=False) + + self.assertEqual('BUILD', server['status']) + + self._delete_and_check_allocations(server) + class ServerRescheduleTestsWithNestedResourcesRequest(ServerRescheduleTests): compute_driver = 'fake.FakeRescheduleDriverWithNestedCustomResources' @@ -6754,3 +6787,47 @@ class PortResourceRequestReSchedulingTest( updated_port = self.neutron.show_port(port['id'])['port'] binding_profile = updated_port['binding:profile'] self.assertNotIn('allocation', binding_profile) + + def test_boot_reschedule_fill_provider_mapping_raises(self): + """Verify that if the _fill_provider_mapping raises during re-schedule + then the instance is properly put into ERROR state. + """ + + port = self.neutron.port_with_resource_request + + # First call is during boot, we want that to succeed normally. Then the + # fake virt driver triggers a re-schedule. During that re-schedule the + # fill is called again, and we simulate that call raises. + fill = manager.ComputeTaskManager._fill_provider_mapping + + with mock.patch( + 'nova.conductor.manager.ComputeTaskManager.' + '_fill_provider_mapping', + side_effect=[ + fill, + exception.ConsumerAllocationRetrievalFailed( + consumer_uuid=uuids.inst1, error='testing')], + autospec=True): + server = self._create_server( + flavor=self.flavor, + networks=[{'port': port['id']}]) + # NOTE(gibi): Due to bug 1819460 the server stuck in BUILD state + server = self._wait_for_state_change( + self.admin_api, server, 'ERROR', + fail_when_run_out_of_retries=False) + + self.assertEqual('BUILD', server['status']) + + # NOTE(gibi): Due to bug 1819460 the server stuck in BUILD state and no + # error is presented to the user + # self.assertIn( + # 'Failed to retrieve allocations for consumer', + # server['fault']['message']) + # + # NOTE(gibi): even after delete the allocation of such server is leaked + # self._delete_and_check_allocations(server) + # + # # assert that unbind removes the allocation from the binding + # updated_port = self.neutron.show_port(port['id'])['port'] + # binding_profile = updated_port['binding:profile'] + # self.assertNotIn('allocation', binding_profile)