From e558aa2046b649696b69551657e5d429e8117427 Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Thu, 24 Sep 2020 15:04:21 +0200 Subject: [PATCH] Reproduce bug 1896463 in func env There is a race condition between the rebuild and the _update_available_resource periodic task on the compute. This patch adds a reproducer functional test. Unfortunately it needs some injected sleep to make the race happen in a stable way. This is suboptimal but only adds 3 seconds of slowness to the test execution. Conflicts: nova/tests/functional/regressions/test_bug_1896463.py due to I84c58de90dad6d86271767363aef90ddac0f1730 and I8c96b337f32148f8f5899c9b87af331b1fa41424 is not in stable/train. Also I had to make the test code python 2 compatible. Change-Id: Id0577bceed9808b52da4acc352cf9c204f6c8861 Related-Bug: #1896463 (cherry picked from commit 3f348602ae4a40c52c7135b2cb48deaa6052c488) (cherry picked from commit d768cdbb88d0b0b3ca38623c4bb26d5eabdf1596) (cherry picked from commit 02114a9d7f2e8b62d3a7091ca3fde251dfffa860) --- .../regressions/test_bug_1896463.py | 243 ++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 nova/tests/functional/regressions/test_bug_1896463.py diff --git a/nova/tests/functional/regressions/test_bug_1896463.py b/nova/tests/functional/regressions/test_bug_1896463.py new file mode 100644 index 000000000000..af381b69fcf5 --- /dev/null +++ b/nova/tests/functional/regressions/test_bug_1896463.py @@ -0,0 +1,243 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import fixtures +import time + +from oslo_config import cfg + +from nova import context +from nova import objects +from nova import test +from nova.tests import fixtures as nova_fixtures +from nova.tests.functional import fixtures as func_fixtures +from nova.tests.functional import integrated_helpers +from nova.tests.unit.image import fake as fake_image +from nova import utils +from nova.virt import fake + + +CONF = cfg.CONF + + +class TestEvacuateResourceTrackerRace( + test.TestCase, integrated_helpers.InstanceHelperMixin, +): + """Demonstrate bug #1896463. + + Trigger a race condition between an almost finished evacuation that is + dropping the migration context, and the _update_available_resource() + periodic task that already loaded the instance list but haven't loaded the + migration list yet. The result is that the PCI allocation made by the + evacuation is deleted by the overlapping periodic task run and the instance + will not have PCI allocation after the evacuation. + """ + + def setUp(self): + super(TestEvacuateResourceTrackerRace, self).setUp() + self.neutron = self.useFixture(nova_fixtures.NeutronFixture(self)) + fake_image.stub_out_image_service(self) + self.addCleanup(fake_image.FakeImageService_reset) + self.placement = self.useFixture(func_fixtures.PlacementFixture()).api + + self.api_fixture = self.useFixture(nova_fixtures.OSAPIFixture( + api_version='v2.1')) + + self.admin_api = self.api_fixture.admin_api + self.admin_api = self.api_fixture.admin_api + self.admin_api.microversion = 'latest' + self.api = self.admin_api + + self.start_service('conductor') + self.start_service('scheduler') + + self.flags(compute_driver='fake.FakeDriverWithPciResources') + self.useFixture( + fake.FakeDriverWithPciResources. + FakeDriverWithPciResourcesConfigFixture()) + + self.compute1 = self.start_service('compute', host='host1') + + self.compute1_id = self._get_compute_node_id_by_host('host1') + self.compute1_service_id = self.admin_api.get_services( + host='host1', binary='nova-compute')[0]['id'] + + self.compute2 = self.start_service('compute', host='host2') + self.compute2_id = self._get_compute_node_id_by_host('host2') + self.compute2_service_id = self.admin_api.get_services( + host='host2', binary='nova-compute')[0]['id'] + + # add extra ports and the related network to the neutron fixture + # specifically for these tests. It cannot be added globally in the + # fixture init as it adds a second network that makes auto allocation + # based test to fail due to ambiguous networks. + self.neutron._ports[self.neutron.sriov_port['id']] = \ + copy.deepcopy(self.neutron.sriov_port) + self.neutron._networks[ + self.neutron.network_2['id']] = self.neutron.network_2 + self.neutron._subnets[ + self.neutron.subnet_2['id']] = self.neutron.subnet_2 + + self.ctxt = context.get_admin_context() + + def _get_compute_node_id_by_host(self, host): + # we specifically need the integer id of the node not the UUID so we + # need to use the old microversion + with utils.temporary_mutation(self.admin_api, microversion='2.52'): + hypers = self.admin_api.api_get( + 'os-hypervisors').body['hypervisors'] + for hyper in hypers: + if hyper['hypervisor_hostname'] == host: + return hyper['id'] + + self.fail('Hypervisor with hostname=%s not found' % host) + + def _assert_pci_device_allocated( + self, instance_uuid, compute_node_id, num=1): + """Assert that a given number of PCI devices are allocated to the + instance on the given host. + """ + + devices = objects.PciDeviceList.get_by_instance_uuid( + self.ctxt, instance_uuid) + devices_on_host = [dev for dev in devices + if dev.compute_node_id == compute_node_id] + self.assertEqual(num, len(devices_on_host)) + + def test_evacuate_races_with_update_available_resource(self): + # Create a server with a direct port to have PCI allocation + server_req = self._build_minimal_create_server_request( + self.api, 'test-server-for-bug-1896463', + image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6', + networks=[{'port': self.neutron.sriov_port['id']}]) + server_req['availability_zone'] = 'nova:host1' + created_server = self.api.post_server({'server': server_req}) + server = self._wait_for_state_change( + self.admin_api, created_server, 'ACTIVE') + + self._assert_pci_device_allocated(server['id'], self.compute1_id) + self._assert_pci_device_allocated( + server['id'], self.compute2_id, num=0) + + # stop and force down the compute the instance is on to allow + # evacuation + self.compute1.stop() + self.admin_api.put_service( + self.compute1_service_id, {'forced_down': 'true'}) + + # Inject some sleeps both in the Instance.drop_migration_context and + # the MigrationList.get_in_progress_by_host_and_node code to make them + # overlap. + # We want to create the following execution scenario: + # 1) The evacuation makes a move claim on the dest including the PCI + # claim. This means there is a migration context. But the evacuation + # is not complete yet so the instance.host does not point to the + # dest host. + # 2) The dest resource tracker starts an _update_available_resource() + # periodic task and this task loads the list of instances on its + # host from the DB. Our instance is not in this list due to #1. + # 3) The evacuation finishes, the instance.host is set to the dest host + # and the migration context is deleted. + # 4) The periodic task now loads the list of in-progress migration from + # the DB to check for incoming our outgoing migrations. However due + # to #3 our instance is not in this list either. + # 5) The periodic task cleans up every lingering PCI claim that is not + # connected to any instance collected above from the instance list + # and from the migration list. As our instance is not in either of + # the lists, the resource tracker cleans up the PCI allocation for + # the already finished evacuation of our instance. + # + # Unfortunately we cannot reproduce the above situation without sleeps. + # We need that the evac starts first then the periodic starts, but not + # finishes, then evac finishes, then periodic finishes. If I trigger + # and run the whole periodic in a wrapper of drop_migration_context + # then I could not reproduce the situation described at #4). In general + # it is not + # + # evac + # | + # | + # | periodic + # | | + # | | + # | x + # | + # | + # x + # + # but + # + # evac + # | + # | + # | periodic + # | | + # | | + # | | + # x | + # | + # x + # + # what is needed need. + # + # Starting the periodic from the test in a separate thread at + # drop_migration_context() might work but that is an extra complexity + # in the test code. Also it might need a sleep still to make the + # reproduction stable but only one sleep instead of two. + orig_drop = objects.Instance.drop_migration_context + + def slow_drop(*args, **kwargs): + time.sleep(1) + return orig_drop(*args, **kwargs) + + self.useFixture( + fixtures.MockPatch( + 'nova.objects.instance.Instance.drop_migration_context', + new=slow_drop)) + + orig_get_mig = objects.MigrationList.get_in_progress_by_host_and_node + + def slow_get_mig(*args, **kwargs): + time.sleep(2) + return orig_get_mig(*args, **kwargs) + + self.useFixture( + fixtures.MockPatch( + 'nova.objects.migration.MigrationList.' + 'get_in_progress_by_host_and_node', + side_effect=slow_get_mig)) + + self.admin_api.post_server_action(server['id'], {'evacuate': {}}) + # we trigger the _update_available_resource periodic to overlap with + # the already started evacuation + ctx = context.get_admin_context() + self.compute1.manager.update_available_resource(ctx) + self.compute2.manager.update_available_resource(ctx) + + self._wait_for_server_parameter( + self.admin_api, + server, + {'OS-EXT-SRV-ATTR:host': 'host2', 'status': 'ACTIVE'} + ) + + self._assert_pci_device_allocated(server['id'], self.compute1_id) + + # This is bug #1896463 as the PCI allocation was deleted by the racing + # _update_available_resource periodic task. + self._assert_pci_device_allocated( + server['id'], self.compute2_id, num=0) + + # FIXME(gibi): When this bug is fixed (or if you remove the sleeps + # above to avoid the race condition) then we expect that the PCI + # allocation exists on the destination host too. + # self._assert_pci_device_allocated(server['id'], self.compute2_id)