From e558aa2046b649696b69551657e5d429e8117427 Mon Sep 17 00:00:00 2001
From: Balazs Gibizer <balazs.gibizer@est.tech>
Date: Thu, 24 Sep 2020 15:04:21 +0200
Subject: [PATCH] Reproduce bug 1896463 in func env

There is a race condition between the rebuild and the
_update_available_resource periodic task on the compute. This patch adds
a reproducer functional test. Unfortunately it needs some injected sleep
to make the race happen in a stable way. This is suboptimal but only
adds 3 seconds of slowness to the test execution.

Conflicts:
  nova/tests/functional/regressions/test_bug_1896463.py due to
  I84c58de90dad6d86271767363aef90ddac0f1730 and
  I8c96b337f32148f8f5899c9b87af331b1fa41424 is not in stable/train.
  Also I had to make the test code python 2 compatible.

Change-Id: Id0577bceed9808b52da4acc352cf9c204f6c8861
Related-Bug: #1896463
(cherry picked from commit 3f348602ae4a40c52c7135b2cb48deaa6052c488)
(cherry picked from commit d768cdbb88d0b0b3ca38623c4bb26d5eabdf1596)
(cherry picked from commit 02114a9d7f2e8b62d3a7091ca3fde251dfffa860)
---
 .../regressions/test_bug_1896463.py           | 243 ++++++++++++++++++
 1 file changed, 243 insertions(+)
 create mode 100644 nova/tests/functional/regressions/test_bug_1896463.py

diff --git a/nova/tests/functional/regressions/test_bug_1896463.py b/nova/tests/functional/regressions/test_bug_1896463.py
new file mode 100644
index 000000000000..af381b69fcf5
--- /dev/null
+++ b/nova/tests/functional/regressions/test_bug_1896463.py
@@ -0,0 +1,243 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import fixtures
+import time
+
+from oslo_config import cfg
+
+from nova import context
+from nova import objects
+from nova import test
+from nova.tests import fixtures as nova_fixtures
+from nova.tests.functional import fixtures as func_fixtures
+from nova.tests.functional import integrated_helpers
+from nova.tests.unit.image import fake as fake_image
+from nova import utils
+from nova.virt import fake
+
+
+CONF = cfg.CONF
+
+
+class TestEvacuateResourceTrackerRace(
+    test.TestCase, integrated_helpers.InstanceHelperMixin,
+):
+    """Demonstrate bug #1896463.
+
+    Trigger a race condition between an almost finished evacuation that is
+    dropping the migration context, and the _update_available_resource()
+    periodic task that already loaded the instance list but haven't loaded the
+    migration list yet. The result is that the PCI allocation made by the
+    evacuation is deleted by the overlapping periodic task run and the instance
+    will not have PCI allocation after the evacuation.
+    """
+
+    def setUp(self):
+        super(TestEvacuateResourceTrackerRace, self).setUp()
+        self.neutron = self.useFixture(nova_fixtures.NeutronFixture(self))
+        fake_image.stub_out_image_service(self)
+        self.addCleanup(fake_image.FakeImageService_reset)
+        self.placement = self.useFixture(func_fixtures.PlacementFixture()).api
+
+        self.api_fixture = self.useFixture(nova_fixtures.OSAPIFixture(
+            api_version='v2.1'))
+
+        self.admin_api = self.api_fixture.admin_api
+        self.admin_api = self.api_fixture.admin_api
+        self.admin_api.microversion = 'latest'
+        self.api = self.admin_api
+
+        self.start_service('conductor')
+        self.start_service('scheduler')
+
+        self.flags(compute_driver='fake.FakeDriverWithPciResources')
+        self.useFixture(
+            fake.FakeDriverWithPciResources.
+                FakeDriverWithPciResourcesConfigFixture())
+
+        self.compute1 = self.start_service('compute', host='host1')
+
+        self.compute1_id = self._get_compute_node_id_by_host('host1')
+        self.compute1_service_id = self.admin_api.get_services(
+            host='host1', binary='nova-compute')[0]['id']
+
+        self.compute2 = self.start_service('compute', host='host2')
+        self.compute2_id = self._get_compute_node_id_by_host('host2')
+        self.compute2_service_id = self.admin_api.get_services(
+            host='host2', binary='nova-compute')[0]['id']
+
+        # add extra ports and the related network to the neutron fixture
+        # specifically for these tests. It cannot be added globally in the
+        # fixture init as it adds a second network that makes auto allocation
+        # based test to fail due to ambiguous networks.
+        self.neutron._ports[self.neutron.sriov_port['id']] = \
+            copy.deepcopy(self.neutron.sriov_port)
+        self.neutron._networks[
+            self.neutron.network_2['id']] = self.neutron.network_2
+        self.neutron._subnets[
+            self.neutron.subnet_2['id']] = self.neutron.subnet_2
+
+        self.ctxt = context.get_admin_context()
+
+    def _get_compute_node_id_by_host(self, host):
+        # we specifically need the integer id of the node not the UUID so we
+        # need to use the old microversion
+        with utils.temporary_mutation(self.admin_api, microversion='2.52'):
+            hypers = self.admin_api.api_get(
+                'os-hypervisors').body['hypervisors']
+            for hyper in hypers:
+                if hyper['hypervisor_hostname'] == host:
+                    return hyper['id']
+
+            self.fail('Hypervisor with hostname=%s not found' % host)
+
+    def _assert_pci_device_allocated(
+            self, instance_uuid, compute_node_id, num=1):
+        """Assert that a given number of PCI devices are allocated to the
+        instance on the given host.
+        """
+
+        devices = objects.PciDeviceList.get_by_instance_uuid(
+            self.ctxt, instance_uuid)
+        devices_on_host = [dev for dev in devices
+                           if dev.compute_node_id == compute_node_id]
+        self.assertEqual(num, len(devices_on_host))
+
+    def test_evacuate_races_with_update_available_resource(self):
+        # Create a server with a direct port to have PCI allocation
+        server_req = self._build_minimal_create_server_request(
+            self.api, 'test-server-for-bug-1896463',
+            image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
+            networks=[{'port': self.neutron.sriov_port['id']}])
+        server_req['availability_zone'] = 'nova:host1'
+        created_server = self.api.post_server({'server': server_req})
+        server = self._wait_for_state_change(
+            self.admin_api, created_server, 'ACTIVE')
+
+        self._assert_pci_device_allocated(server['id'], self.compute1_id)
+        self._assert_pci_device_allocated(
+            server['id'], self.compute2_id, num=0)
+
+        # stop and force down the compute the instance is on to allow
+        # evacuation
+        self.compute1.stop()
+        self.admin_api.put_service(
+            self.compute1_service_id, {'forced_down': 'true'})
+
+        # Inject some sleeps both in the Instance.drop_migration_context and
+        # the MigrationList.get_in_progress_by_host_and_node code to make them
+        # overlap.
+        # We want to create the following execution scenario:
+        # 1) The evacuation makes a move claim on the dest including the PCI
+        #    claim. This means there is a migration context. But the evacuation
+        #    is not complete yet so the instance.host does not point to the
+        #    dest host.
+        # 2) The dest resource tracker starts an _update_available_resource()
+        #    periodic task and this task loads the list of instances on its
+        #    host from the DB. Our instance is not in this list due to #1.
+        # 3) The evacuation finishes, the instance.host is set to the dest host
+        #    and the migration context is deleted.
+        # 4) The periodic task now loads the list of in-progress migration from
+        #    the DB to check for incoming our outgoing migrations. However due
+        #    to #3 our instance is not in this list either.
+        # 5) The periodic task cleans up every lingering PCI claim that is not
+        #    connected to any instance collected above from the instance list
+        #    and from the migration list. As our instance is not in either of
+        #    the lists, the resource tracker  cleans up the PCI allocation for
+        #    the already finished evacuation of our instance.
+        #
+        # Unfortunately we cannot reproduce the above situation without sleeps.
+        # We need that the evac starts first then the periodic starts, but not
+        # finishes, then evac finishes, then periodic finishes. If I trigger
+        # and run the whole periodic in a wrapper of drop_migration_context
+        # then I could not reproduce the situation described at #4). In general
+        # it is not
+        #
+        #   evac
+        #    |
+        #    |
+        #    |     periodic
+        #    |        |
+        #    |        |
+        #    |        x
+        #    |
+        #    |
+        #    x
+        #
+        # but
+        #
+        #   evac
+        #    |
+        #    |
+        #    |     periodic
+        #    |        |
+        #    |        |
+        #    |        |
+        #    x        |
+        #             |
+        #             x
+        #
+        # what is needed need.
+        #
+        # Starting the periodic from the test in a separate thread at
+        # drop_migration_context() might work but that is an extra complexity
+        # in the test code. Also it might need a sleep still to make the
+        # reproduction stable but only one sleep instead of two.
+        orig_drop = objects.Instance.drop_migration_context
+
+        def slow_drop(*args, **kwargs):
+            time.sleep(1)
+            return orig_drop(*args, **kwargs)
+
+        self.useFixture(
+            fixtures.MockPatch(
+                'nova.objects.instance.Instance.drop_migration_context',
+                new=slow_drop))
+
+        orig_get_mig = objects.MigrationList.get_in_progress_by_host_and_node
+
+        def slow_get_mig(*args, **kwargs):
+            time.sleep(2)
+            return orig_get_mig(*args, **kwargs)
+
+        self.useFixture(
+            fixtures.MockPatch(
+                'nova.objects.migration.MigrationList.'
+                'get_in_progress_by_host_and_node',
+                side_effect=slow_get_mig))
+
+        self.admin_api.post_server_action(server['id'], {'evacuate': {}})
+        # we trigger the _update_available_resource periodic to overlap with
+        # the already started evacuation
+        ctx = context.get_admin_context()
+        self.compute1.manager.update_available_resource(ctx)
+        self.compute2.manager.update_available_resource(ctx)
+
+        self._wait_for_server_parameter(
+            self.admin_api,
+            server,
+            {'OS-EXT-SRV-ATTR:host': 'host2', 'status': 'ACTIVE'}
+        )
+
+        self._assert_pci_device_allocated(server['id'], self.compute1_id)
+
+        # This is bug #1896463 as the PCI allocation was deleted by the racing
+        # _update_available_resource periodic task.
+        self._assert_pci_device_allocated(
+            server['id'], self.compute2_id, num=0)
+
+        # FIXME(gibi): When this bug is fixed (or if you remove the sleeps
+        # above to avoid the race condition) then we expect that the PCI
+        # allocation exists on the destination host too.
+        # self._assert_pci_device_allocated(server['id'], self.compute2_id)