diff --git a/nova/tests/functional/libvirt/test_numa_live_migration.py b/nova/tests/functional/libvirt/test_numa_live_migration.py index c5501734ee8a..69e3d71b05c5 100644 --- a/nova/tests/functional/libvirt/test_numa_live_migration.py +++ b/nova/tests/functional/libvirt/test_numa_live_migration.py @@ -19,6 +19,7 @@ from oslo_config import cfg from oslo_log import log as logging from nova.compute import manager as compute_manager +from nova.compute import resource_tracker as rt from nova import context from nova import objects from nova import test @@ -207,7 +208,7 @@ class NUMALiveMigrationPositiveTests(NUMALiveMigrationPositiveBase): dom.complete_job() self.migrate_stub_ran = True - def _test(self, pin_dest=False): + def _test(self, pin_dest): """Live migrate the server on host_a to host_b. """ # Make sure instances initially land on "overlapping" CPUs on both @@ -254,7 +255,7 @@ class NUMALiveMigrationPositiveTests(NUMALiveMigrationPositiveBase): # direction here. def test_numa_live_migration(self): - self._test() + self._test(pin_dest=False) def test_numa_live_migration_dest_pinned(self): self._test(pin_dest=True) @@ -298,7 +299,7 @@ class NUMALiveMigrationPositiveTests(NUMALiveMigrationPositiveBase): self.useFixture(fixtures.MonkeyPatch( 'nova.compute.manager.ComputeManager.live_migration', live_migration)) - self._test() + self._test(pin_dest=False) self.assertTrue(self.live_migration_ran) @@ -358,6 +359,9 @@ class NUMALiveMigrationRollbackTests(NUMALiveMigrationPositiveBase): self.assertEqual('host_a', self.get_host(self.server_a['id'])) self.assertIsNone(self._get_migration_context(self.server_a['id'])) + def _test_rollback(self, pin_dest=False): + self._test(pin_dest) + # Check consumed and pinned CPUs. Things should be as they were before # the live migration, with CPUs 0,1 consumed on both hosts by the 2 # servers. @@ -367,10 +371,77 @@ class NUMALiveMigrationRollbackTests(NUMALiveMigrationPositiveBase): [0, 1], [0, 1]) def test_rollback(self): - self._test() + self._test_rollback() def test_rollback_pinned_dest(self): - self._test(pin_dest=True) + self._test_rollback(pin_dest=True) + + def _test_bug_1894095(self, pre_drop_race=False, post_drop_race=False): + """Reproducer for bug #1894095 under live migration. + + Demonstrate the possibility of races caused by running the resource + tracker's periodic task between marking a migration as failed and + dropping the claim for that migration on the destination host. + """ + + orig_drop_move_claim = rt.ResourceTracker.drop_move_claim + + def drop_move_claim(*args, **kwargs): + """Run periodics after marking the migration confirmed, simulating + a race between the doing this and actually dropping the claim. + """ + + # check the usage, which should show usage on both hosts: server_a + # on host_a, and server_b plus server_a's migration on host_b + self._assert_host_consumed_cpus('host_a', [0, 1]) + self._assert_host_consumed_cpus('host_b', [0, 1, 2, 3]) + + if pre_drop_race: + self._run_periodics() + # FIXME(stephenfin): This is picking up server_a's "destination + # CPUs", intended for host_b, on host_a + self._assert_host_consumed_cpus('host_a', [2, 3]) + self._assert_host_consumed_cpus('host_b', [0, 1, 2, 3]) + # self._assert_host_consumed_cpus('host_a', [0, 1]) + # self._assert_host_consumed_cpus('host_b', [0, 1, 2, 3]) + + result = orig_drop_move_claim(*args, **kwargs) + + if pre_drop_race: + # FIXME(stephenfin): host_a's pinning information is still + # incorrect, which is expected since dropping the move claim + # makes no changes to the source host + self._assert_host_consumed_cpus('host_a', [2, 3]) + else: + self._assert_host_consumed_cpus('host_a', [0, 1]) + self._assert_host_consumed_cpus('host_b', [0, 1]) + + if post_drop_race: + self._run_periodics() + # FIXME(stephenfin): host_a is using the wrong pinned CPUs and + # host_b has regained its previously dropped allocation + self._assert_host_consumed_cpus('host_a', [2, 3]) + self._assert_host_consumed_cpus('host_b', [0, 1, 2, 3]) + # self._assert_host_consumed_cpus('host_a', [0, 1]) + # self._assert_host_consumed_cpus('host_b', [0, 1]) + + return result + + self.useFixture(fixtures.MonkeyPatch( + 'nova.compute.resource_tracker.ResourceTracker.drop_move_claim', + drop_move_claim)) + + self._test() + + self._run_periodics() + self._assert_host_consumed_cpus('host_a', [0, 1]) + self._assert_host_consumed_cpus('host_b', [0, 1]) + + def test_bug_1894095_pre_drop(self): + self._test_bug_1894095(pre_drop_race=True) + + def test_bug_1894095_post_drop(self): + self._test_bug_1894095(post_drop_race=True) class NUMALiveMigrationLegacyBase(NUMALiveMigrationPositiveBase):