Add reproducer for bug #1894095
As with the resize/cold migration confirm/resize flow, the rolling back of live migrations can be racy due to us doing various things without the benefit of locks. Add a reproducer demonstrating this. Change-Id: I276d2bbce4a0390a6cfd597bb51481cc53eca918 Signed-off-by: Stephen Finucane <stephenfin@redhat.com> Related-Bug: #1894095 Related-Bug: #1879878
This commit is contained in:
parent
d01972b272
commit
99850a0481
|
@ -19,6 +19,7 @@ from oslo_config import cfg
|
||||||
from oslo_log import log as logging
|
from oslo_log import log as logging
|
||||||
|
|
||||||
from nova.compute import manager as compute_manager
|
from nova.compute import manager as compute_manager
|
||||||
|
from nova.compute import resource_tracker as rt
|
||||||
from nova import context
|
from nova import context
|
||||||
from nova import objects
|
from nova import objects
|
||||||
from nova import test
|
from nova import test
|
||||||
|
@ -207,7 +208,7 @@ class NUMALiveMigrationPositiveTests(NUMALiveMigrationPositiveBase):
|
||||||
dom.complete_job()
|
dom.complete_job()
|
||||||
self.migrate_stub_ran = True
|
self.migrate_stub_ran = True
|
||||||
|
|
||||||
def _test(self, pin_dest=False):
|
def _test(self, pin_dest):
|
||||||
"""Live migrate the server on host_a to host_b.
|
"""Live migrate the server on host_a to host_b.
|
||||||
"""
|
"""
|
||||||
# Make sure instances initially land on "overlapping" CPUs on both
|
# Make sure instances initially land on "overlapping" CPUs on both
|
||||||
|
@ -254,7 +255,7 @@ class NUMALiveMigrationPositiveTests(NUMALiveMigrationPositiveBase):
|
||||||
# direction here.
|
# direction here.
|
||||||
|
|
||||||
def test_numa_live_migration(self):
|
def test_numa_live_migration(self):
|
||||||
self._test()
|
self._test(pin_dest=False)
|
||||||
|
|
||||||
def test_numa_live_migration_dest_pinned(self):
|
def test_numa_live_migration_dest_pinned(self):
|
||||||
self._test(pin_dest=True)
|
self._test(pin_dest=True)
|
||||||
|
@ -298,7 +299,7 @@ class NUMALiveMigrationPositiveTests(NUMALiveMigrationPositiveBase):
|
||||||
self.useFixture(fixtures.MonkeyPatch(
|
self.useFixture(fixtures.MonkeyPatch(
|
||||||
'nova.compute.manager.ComputeManager.live_migration',
|
'nova.compute.manager.ComputeManager.live_migration',
|
||||||
live_migration))
|
live_migration))
|
||||||
self._test()
|
self._test(pin_dest=False)
|
||||||
self.assertTrue(self.live_migration_ran)
|
self.assertTrue(self.live_migration_ran)
|
||||||
|
|
||||||
|
|
||||||
|
@ -358,6 +359,9 @@ class NUMALiveMigrationRollbackTests(NUMALiveMigrationPositiveBase):
|
||||||
self.assertEqual('host_a', self.get_host(self.server_a['id']))
|
self.assertEqual('host_a', self.get_host(self.server_a['id']))
|
||||||
self.assertIsNone(self._get_migration_context(self.server_a['id']))
|
self.assertIsNone(self._get_migration_context(self.server_a['id']))
|
||||||
|
|
||||||
|
def _test_rollback(self, pin_dest=False):
|
||||||
|
self._test(pin_dest)
|
||||||
|
|
||||||
# Check consumed and pinned CPUs. Things should be as they were before
|
# Check consumed and pinned CPUs. Things should be as they were before
|
||||||
# the live migration, with CPUs 0,1 consumed on both hosts by the 2
|
# the live migration, with CPUs 0,1 consumed on both hosts by the 2
|
||||||
# servers.
|
# servers.
|
||||||
|
@ -367,10 +371,77 @@ class NUMALiveMigrationRollbackTests(NUMALiveMigrationPositiveBase):
|
||||||
[0, 1], [0, 1])
|
[0, 1], [0, 1])
|
||||||
|
|
||||||
def test_rollback(self):
|
def test_rollback(self):
|
||||||
self._test()
|
self._test_rollback()
|
||||||
|
|
||||||
def test_rollback_pinned_dest(self):
|
def test_rollback_pinned_dest(self):
|
||||||
self._test(pin_dest=True)
|
self._test_rollback(pin_dest=True)
|
||||||
|
|
||||||
|
def _test_bug_1894095(self, pre_drop_race=False, post_drop_race=False):
|
||||||
|
"""Reproducer for bug #1894095 under live migration.
|
||||||
|
|
||||||
|
Demonstrate the possibility of races caused by running the resource
|
||||||
|
tracker's periodic task between marking a migration as failed and
|
||||||
|
dropping the claim for that migration on the destination host.
|
||||||
|
"""
|
||||||
|
|
||||||
|
orig_drop_move_claim = rt.ResourceTracker.drop_move_claim
|
||||||
|
|
||||||
|
def drop_move_claim(*args, **kwargs):
|
||||||
|
"""Run periodics after marking the migration confirmed, simulating
|
||||||
|
a race between the doing this and actually dropping the claim.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# check the usage, which should show usage on both hosts: server_a
|
||||||
|
# on host_a, and server_b plus server_a's migration on host_b
|
||||||
|
self._assert_host_consumed_cpus('host_a', [0, 1])
|
||||||
|
self._assert_host_consumed_cpus('host_b', [0, 1, 2, 3])
|
||||||
|
|
||||||
|
if pre_drop_race:
|
||||||
|
self._run_periodics()
|
||||||
|
# FIXME(stephenfin): This is picking up server_a's "destination
|
||||||
|
# CPUs", intended for host_b, on host_a
|
||||||
|
self._assert_host_consumed_cpus('host_a', [2, 3])
|
||||||
|
self._assert_host_consumed_cpus('host_b', [0, 1, 2, 3])
|
||||||
|
# self._assert_host_consumed_cpus('host_a', [0, 1])
|
||||||
|
# self._assert_host_consumed_cpus('host_b', [0, 1, 2, 3])
|
||||||
|
|
||||||
|
result = orig_drop_move_claim(*args, **kwargs)
|
||||||
|
|
||||||
|
if pre_drop_race:
|
||||||
|
# FIXME(stephenfin): host_a's pinning information is still
|
||||||
|
# incorrect, which is expected since dropping the move claim
|
||||||
|
# makes no changes to the source host
|
||||||
|
self._assert_host_consumed_cpus('host_a', [2, 3])
|
||||||
|
else:
|
||||||
|
self._assert_host_consumed_cpus('host_a', [0, 1])
|
||||||
|
self._assert_host_consumed_cpus('host_b', [0, 1])
|
||||||
|
|
||||||
|
if post_drop_race:
|
||||||
|
self._run_periodics()
|
||||||
|
# FIXME(stephenfin): host_a is using the wrong pinned CPUs and
|
||||||
|
# host_b has regained its previously dropped allocation
|
||||||
|
self._assert_host_consumed_cpus('host_a', [2, 3])
|
||||||
|
self._assert_host_consumed_cpus('host_b', [0, 1, 2, 3])
|
||||||
|
# self._assert_host_consumed_cpus('host_a', [0, 1])
|
||||||
|
# self._assert_host_consumed_cpus('host_b', [0, 1])
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
self.useFixture(fixtures.MonkeyPatch(
|
||||||
|
'nova.compute.resource_tracker.ResourceTracker.drop_move_claim',
|
||||||
|
drop_move_claim))
|
||||||
|
|
||||||
|
self._test()
|
||||||
|
|
||||||
|
self._run_periodics()
|
||||||
|
self._assert_host_consumed_cpus('host_a', [0, 1])
|
||||||
|
self._assert_host_consumed_cpus('host_b', [0, 1])
|
||||||
|
|
||||||
|
def test_bug_1894095_pre_drop(self):
|
||||||
|
self._test_bug_1894095(pre_drop_race=True)
|
||||||
|
|
||||||
|
def test_bug_1894095_post_drop(self):
|
||||||
|
self._test_bug_1894095(post_drop_race=True)
|
||||||
|
|
||||||
|
|
||||||
class NUMALiveMigrationLegacyBase(NUMALiveMigrationPositiveBase):
|
class NUMALiveMigrationLegacyBase(NUMALiveMigrationPositiveBase):
|
||||||
|
|
Loading…
Reference in New Issue