Expand generic reproducer for bug #1879878

This shows that this bug doesn't affect cross-cell resize in the same
way, but does highlight some gaps we need to close.

Change-Id: Iee2c5521f41a2d061d21d4556cfbf7a898ad96c9
Signed-off-by: Stephen Finucane <stephenfin@redhat.com>
Related-Bug: #1879878
This commit is contained in:
Stephen Finucane 2020-09-08 09:54:38 +01:00
parent b75e689864
commit 27b37ed5c8
2 changed files with 201 additions and 5 deletions

View File

@ -421,12 +421,14 @@ class InstanceHelperMixin:
fake_notifier.wait_for_versioned_notifications('instance.resize.end')
return self._wait_for_state_change(server, 'VERIFY_RESIZE')
def _confirm_resize(self, server):
def _confirm_resize(self, server, *, cross_cell=False):
self.api.post_server_action(server['id'], {'confirmResize': None})
server = self._wait_for_state_change(server, 'ACTIVE')
event = 'compute_confirm_resize'
if cross_cell:
event = 'conductor_confirm_snapshot_based_resize'
self._wait_for_instance_action_event(
server, instance_actions.CONFIRM_RESIZE,
'compute_confirm_resize', 'success')
server, instance_actions.CONFIRM_RESIZE, event, 'success')
return server
def _revert_resize(self, server):

View File

@ -11,16 +11,21 @@
# under the License.
import ddt
import mock
from nova.compute import resource_tracker as rt
from nova import context as nova_context
from nova import objects
from nova.policies import base as base_policies
from nova.policies import servers as servers_policies
from nova import test
from nova.tests.functional import integrated_helpers
from nova.virt import fake as fake_driver
@ddt.ddt
class TestColdMigrationUsage(integrated_helpers._IntegratedTestBase):
"""Reproducer for bug #1879878.
class TestSameCell(integrated_helpers._IntegratedTestBase):
"""Reproducer for bug #1879878 (same-cell).
Demonstrate the possibility of races caused by running the resource
tracker's periodic task between marking a migration as confirmed or
@ -170,3 +175,192 @@ class TestColdMigrationUsage(integrated_helpers._IntegratedTestBase):
self.assertUsage(src_host, 1)
self.assertUsage(dst_host, 0)
# TODO(stephenfin): This should inherit from _IntegratedTestBase
@ddt.ddt
class TestCrossCell(integrated_helpers.ProviderUsageBaseTestCase):
"""Reproducer for bug #1879878 (cross-cell).
Demonstrate the possibility of races caused by running the resource
tracker's periodic task between marking a migration as confirmed or
reverted and dropping the claim for that migration on the source or
destination host, respectively.
"""
NUMBER_OF_CELLS = 2
compute_driver = 'fake.MediumFakeDriver'
def setUp(self):
super().setUp()
# adjust the polling interval and timeout for long RPC calls to
# something smaller and saner
self.flags(rpc_response_timeout=1, long_rpc_timeout=60)
# enable cross-cell resize policy since it defaults to not allow anyone
# to perform that type of operation. For these tests we'll just allow
# admins to perform cross-cell resize.
self.policy.set_rules(
{servers_policies.CROSS_CELL_RESIZE: base_policies.RULE_ADMIN_API},
overwrite=False)
# start two compute services in two different cells so we can migrate
# between them.
self.cell_to_aggregate = {}
self.host_to_cell_mappings = {
'host1': 'cell1', 'host2': 'cell2',
}
for host_name, cell_name in self.host_to_cell_mappings.items():
self._start_compute(host_name, cell_name=cell_name)
# create an aggregate where the AZ name is the cell name.
agg_id = self._create_aggregate(
cell_name, availability_zone=cell_name)
# add the host to the aggregate.
self.admin_api.post_aggregate_action(
agg_id, {'add_host': {'host': host_name}})
self.cell_to_aggregate[cell_name] = agg_id
self.ctxt = nova_context.get_admin_context()
def _create_server(self):
"""Creates and return a server along with a source host and target
host.
"""
server = super()._create_server(networks='none')
self.addCleanup(self._delete_server, server)
source_host = server['OS-EXT-SRV-ATTR:host']
target_host = 'host2' if source_host == 'host1' else 'host1'
return server, source_host, target_host
def assertUsage(self, hostname, usage):
"""Assert VCPU usage for the given host."""
target_cell_name = self.host_to_cell_mappings[hostname]
target_cell = self.cell_mappings[target_cell_name]
with nova_context.target_cell(self.ctxt, target_cell) as cctxt:
cn = objects.ComputeNode.get_by_nodename(cctxt, hostname)
# we could test anything, but vcpu is easy to grok
self.assertEqual(cn.vcpus_used, usage)
@ddt.data(True, False)
def test_migrate_confirm(self, drop_race):
server, src_host, dst_host = self._create_server()
# only one instance created, so usage on its host
self.assertUsage(src_host, 1)
self.assertUsage(dst_host, 0)
orig_drop_claim = rt.ResourceTracker.drop_move_claim
orig_cleanup = fake_driver.FakeDriver.cleanup
def fake_drop_move_claim(*args, **kwargs):
# run periodics after marking the migration confirmed, simulating a
# race between the doing this and actually dropping the claim
# check the usage, which should show usage on both hosts
self.assertUsage(src_host, 1)
self.assertUsage(dst_host, 1)
if drop_race:
self._run_periodics()
self.assertUsage(src_host, 1)
self.assertUsage(dst_host, 1)
return orig_drop_claim(*args, **kwargs)
def fake_cleanup(_self, _context, _instance, *args, **kwargs):
self.assertIsNotNone(_instance.old_flavor)
# FIXME(stephenfin): This should not be unset
self.assertIsNone(_instance.new_flavor)
return orig_cleanup(_self, _context, _instance, *args, **kwargs)
# TODO(stephenfin): Use a helper
self.api.post_server_action(server['id'], {'migrate': None})
self._wait_for_state_change(server, 'VERIFY_RESIZE')
# migration isn't complete so we should have usage on both hosts
self.assertUsage(src_host, 1)
self.assertUsage(dst_host, 1)
with test.nested(
mock.patch(
'nova.compute.resource_tracker.ResourceTracker'
'.drop_move_claim',
new=fake_drop_move_claim),
mock.patch('nova.virt.fake.FakeDriver.cleanup', new=fake_cleanup),
):
self._confirm_resize(server, cross_cell=True)
# migration is now confirmed so we should once again only have usage on
# one host
self.assertUsage(src_host, 0)
self.assertUsage(dst_host, 1)
# running periodics shouldn't change things
self._run_periodics()
self.assertUsage(src_host, 0)
self.assertUsage(dst_host, 1)
@ddt.data(True, False)
def test_migrate_revert(self, drop_race):
server, src_host, dst_host = self._create_server()
# only one instance created, so usage on its host
self.assertUsage(src_host, 1)
self.assertUsage(dst_host, 0)
orig_drop_claim = rt.ResourceTracker.drop_move_claim
orig_finish_revert = fake_driver.FakeDriver.finish_revert_migration
def fake_drop_move_claim(*args, **kwargs):
# run periodics after marking the migration reverted, simulating a
# race between the doing this and actually dropping the claim
# check the usage, which should show usage on both hosts
self.assertUsage(src_host, 1)
self.assertUsage(dst_host, 1)
if drop_race:
self._run_periodics()
self.assertUsage(src_host, 1)
self.assertUsage(dst_host, 1)
return orig_drop_claim(*args, **kwargs)
def fake_finish_revert_migration(_self, _context, _instance, *a, **kw):
self.assertIsNotNone(_instance.old_flavor)
# FIXME(stephenfin): This should not be unset
self.assertIsNone(_instance.new_flavor)
return orig_finish_revert(_self, _context, _instance, *a, **kw)
# TODO(stephenfin): Use a helper
self.api.post_server_action(server['id'], {'migrate': None})
self._wait_for_state_change(server, 'VERIFY_RESIZE')
# migration isn't complete so we should have usage on both hosts
self.assertUsage(src_host, 1)
self.assertUsage(dst_host, 1)
with test.nested(
mock.patch(
'nova.compute.resource_tracker.ResourceTracker'
'.drop_move_claim',
new=fake_drop_move_claim),
mock.patch(
'nova.virt.fake.FakeDriver.finish_revert_migration',
new=fake_finish_revert_migration),
):
self._revert_resize(server)
# migration is now reverted so we should once again only have usage on
# one host
self.assertUsage(src_host, 1)
self.assertUsage(dst_host, 0)
# running periodics shouldn't change things
self._run_periodics()
self.assertUsage(src_host, 1)
self.assertUsage(dst_host, 0)