Merge "Add functional regression test for bug 1837955" into stable/rocky

2019-08-08 16:19:54 +00:00 · 2019-08-08 16:19:54 +00:00 · 979ec661de
parent 7efda0632d f292a92a89
commit 979ec661de
1 changed files with 115 additions and 0 deletions
--- a/nova/tests/functional/regressions/test_bug_1837955.py
+++ b/nova/tests/functional/regressions/test_bug_1837955.py
@ -0,0 +1,115 @@
 # Licensed under the Apache License, Version 2.0 (the "License"); you may
 # not use this file except in compliance with the License. You may obtain
 # a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 # License for the specific language governing permissions and limitations
 # under the License.
 import time
 from nova import exception
 from nova.tests import fixtures as nova_fixtures
 from nova.tests.functional import integrated_helpers
 from nova.tests.unit import fake_notifier
 from nova.tests.unit.image import fake as fake_image
 class BuildRescheduleClaimFailsTestCase(
        integrated_helpers.ProviderUsageBaseTestCase):
    """Regression test case for bug 1837955 where a server build fails on the
    primary host and then attempting to allocate resources on the alternate
    host, the alternate host is full and the allocations claim in placement
    fails, resulting in the build failing due to MaxRetriesExceeded and the
    server going to ERROR status.
    """
    compute_driver = 'fake.SmallFakeDriver'
    def _wait_for_unversioned_notification(self, event_type):
        for x in range(20):  # wait up to 10 seconds
            for notification in fake_notifier.NOTIFICATIONS:
                if notification.event_type == event_type:
                    return notification
            time.sleep(.5)
        self.fail('Timed out waiting for unversioned notification %s. Got: %s'
                  % (event_type, fake_notifier.NOTIFICATIONS))
    def test_build_reschedule_alt_host_alloc_fails(self):
        # Start two compute services so we have one alternate host.
        # Set cpu_allocation_ratio=1.0 to make placement inventory
        # and allocations for VCPU easier to manage.
        self.flags(cpu_allocation_ratio=1.0)
        for x in range(2):
            self._start_compute('host%i' % x)
        def fake_instance_claim(_self, _context, _inst, nodename, *a, **kw):
            # Before triggering the reschedule to the other host, max out the
            # capacity on the alternate host.
            alt_nodename = 'host0' if nodename == 'host1' else 'host1'
            rp_uuid = self._get_provider_uuid_by_host(alt_nodename)
            inventories = self._get_provider_inventory(rp_uuid)
            # Fake some other consumer taking all of the VCPU on the alt host.
            # Since we set cpu_allocation_ratio=1.0 the total is the total
            # capacity for VCPU on the host.
            total_vcpu = inventories['VCPU']['total']
            alt_consumer = '7d32d0bc-af16-44b2-8019-a24925d76152'
            allocs = {
                'allocations': {
                    rp_uuid: {
                        'resources': {
                            'VCPU': total_vcpu
                        }
                    }
                },
                'project_id': self.api.project_id,
                'user_id': self.api.project_id
            }
            resp = self.placement_api.put(
                '/allocations/%s' % alt_consumer, allocs, version='1.12')
            self.assertEqual(204, resp.status, resp.content)
            raise exception.ComputeResourcesUnavailable(reason='overhead!')
        # Stub out the instance claim (regardless of which host the scheduler
        # picks as the primary) to trigger a reschedule.
        self.stub_out('nova.compute.manager.resource_tracker.ResourceTracker.'
                      'instance_claim', fake_instance_claim)
        # Now that our stub is in place, try to create a server and wait for it
        # to go to ERROR status.
        server = self._build_minimal_create_server_request(
            self.api, 'test_build_reschedule_alt_host_alloc_fails',
            image_uuid=fake_image.get_valid_image_id(),
            networks=[{'port': nova_fixtures.NeutronFixture.port_1['id']}])
        server = self.api.post_server({'server': server})
        # FIXME(mriedem): This is bug 1837955 where the status is stuck in
        # BUILD rather than the vm_state being set to error and the task_state
        # being set to None. Uncomment this when the bug is fixed.
        # server = self._wait_for_state_change(self.api, server, 'ERROR')
        # Wait for the MaxRetriesExceeded fault to be recorded.
        # set_vm_state_and_notify sets the vm_state to ERROR before the fault
        # is recorded but after the notification is sent. So wait for the
        # unversioned notification to show up and then get the fault.
        # FIXME(mriedem): Uncomment this when bug 1837955 is fixed.
        # self._wait_for_unversioned_notification(
        #     'compute_task.build_instances')
        # server = self.api.get_server(server['id'])
        # self.assertIn('fault', server)
        # self.assertIn('Exceeded maximum number of retries',
        #               server['fault']['message'])
        # TODO(mriedem): Remove this when the bug is fixed. We need to assert
        # something before the bug is fixed to show the failure so check the
        # logs.
        for x in range(20):
            logs = self.stdlog.logger.output
            if 'MaxRetriesExceeded' in logs:
                break
            time.sleep(.5)
        else:
            self.fail('Timed out waiting for MaxRetriesExceeded to show up '
                      'in the logs.')