Expand generic reproducer for bug #1879878
This shows that this bug doesn't affect cross-cell resize in the same way, but does highlight some gaps we need to close. Change-Id: Iee2c5521f41a2d061d21d4556cfbf7a898ad96c9 Signed-off-by: Stephen Finucane <stephenfin@redhat.com> Related-Bug: #1879878
This commit is contained in:
parent
b75e689864
commit
27b37ed5c8
|
@ -421,12 +421,14 @@ class InstanceHelperMixin:
|
|||
fake_notifier.wait_for_versioned_notifications('instance.resize.end')
|
||||
return self._wait_for_state_change(server, 'VERIFY_RESIZE')
|
||||
|
||||
def _confirm_resize(self, server):
|
||||
def _confirm_resize(self, server, *, cross_cell=False):
|
||||
self.api.post_server_action(server['id'], {'confirmResize': None})
|
||||
server = self._wait_for_state_change(server, 'ACTIVE')
|
||||
event = 'compute_confirm_resize'
|
||||
if cross_cell:
|
||||
event = 'conductor_confirm_snapshot_based_resize'
|
||||
self._wait_for_instance_action_event(
|
||||
server, instance_actions.CONFIRM_RESIZE,
|
||||
'compute_confirm_resize', 'success')
|
||||
server, instance_actions.CONFIRM_RESIZE, event, 'success')
|
||||
return server
|
||||
|
||||
def _revert_resize(self, server):
|
||||
|
|
|
@ -11,16 +11,21 @@
|
|||
# under the License.
|
||||
|
||||
import ddt
|
||||
import mock
|
||||
|
||||
from nova.compute import resource_tracker as rt
|
||||
from nova import context as nova_context
|
||||
from nova import objects
|
||||
from nova.policies import base as base_policies
|
||||
from nova.policies import servers as servers_policies
|
||||
from nova import test
|
||||
from nova.tests.functional import integrated_helpers
|
||||
from nova.virt import fake as fake_driver
|
||||
|
||||
|
||||
@ddt.ddt
|
||||
class TestColdMigrationUsage(integrated_helpers._IntegratedTestBase):
|
||||
"""Reproducer for bug #1879878.
|
||||
class TestSameCell(integrated_helpers._IntegratedTestBase):
|
||||
"""Reproducer for bug #1879878 (same-cell).
|
||||
|
||||
Demonstrate the possibility of races caused by running the resource
|
||||
tracker's periodic task between marking a migration as confirmed or
|
||||
|
@ -170,3 +175,192 @@ class TestColdMigrationUsage(integrated_helpers._IntegratedTestBase):
|
|||
|
||||
self.assertUsage(src_host, 1)
|
||||
self.assertUsage(dst_host, 0)
|
||||
|
||||
|
||||
# TODO(stephenfin): This should inherit from _IntegratedTestBase
|
||||
@ddt.ddt
|
||||
class TestCrossCell(integrated_helpers.ProviderUsageBaseTestCase):
|
||||
"""Reproducer for bug #1879878 (cross-cell).
|
||||
|
||||
Demonstrate the possibility of races caused by running the resource
|
||||
tracker's periodic task between marking a migration as confirmed or
|
||||
reverted and dropping the claim for that migration on the source or
|
||||
destination host, respectively.
|
||||
"""
|
||||
NUMBER_OF_CELLS = 2
|
||||
compute_driver = 'fake.MediumFakeDriver'
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
|
||||
# adjust the polling interval and timeout for long RPC calls to
|
||||
# something smaller and saner
|
||||
self.flags(rpc_response_timeout=1, long_rpc_timeout=60)
|
||||
|
||||
# enable cross-cell resize policy since it defaults to not allow anyone
|
||||
# to perform that type of operation. For these tests we'll just allow
|
||||
# admins to perform cross-cell resize.
|
||||
self.policy.set_rules(
|
||||
{servers_policies.CROSS_CELL_RESIZE: base_policies.RULE_ADMIN_API},
|
||||
overwrite=False)
|
||||
|
||||
# start two compute services in two different cells so we can migrate
|
||||
# between them.
|
||||
self.cell_to_aggregate = {}
|
||||
self.host_to_cell_mappings = {
|
||||
'host1': 'cell1', 'host2': 'cell2',
|
||||
}
|
||||
for host_name, cell_name in self.host_to_cell_mappings.items():
|
||||
self._start_compute(host_name, cell_name=cell_name)
|
||||
# create an aggregate where the AZ name is the cell name.
|
||||
agg_id = self._create_aggregate(
|
||||
cell_name, availability_zone=cell_name)
|
||||
# add the host to the aggregate.
|
||||
self.admin_api.post_aggregate_action(
|
||||
agg_id, {'add_host': {'host': host_name}})
|
||||
self.cell_to_aggregate[cell_name] = agg_id
|
||||
|
||||
self.ctxt = nova_context.get_admin_context()
|
||||
|
||||
def _create_server(self):
|
||||
"""Creates and return a server along with a source host and target
|
||||
host.
|
||||
"""
|
||||
server = super()._create_server(networks='none')
|
||||
self.addCleanup(self._delete_server, server)
|
||||
source_host = server['OS-EXT-SRV-ATTR:host']
|
||||
target_host = 'host2' if source_host == 'host1' else 'host1'
|
||||
return server, source_host, target_host
|
||||
|
||||
def assertUsage(self, hostname, usage):
|
||||
"""Assert VCPU usage for the given host."""
|
||||
target_cell_name = self.host_to_cell_mappings[hostname]
|
||||
target_cell = self.cell_mappings[target_cell_name]
|
||||
with nova_context.target_cell(self.ctxt, target_cell) as cctxt:
|
||||
cn = objects.ComputeNode.get_by_nodename(cctxt, hostname)
|
||||
# we could test anything, but vcpu is easy to grok
|
||||
self.assertEqual(cn.vcpus_used, usage)
|
||||
|
||||
@ddt.data(True, False)
|
||||
def test_migrate_confirm(self, drop_race):
|
||||
server, src_host, dst_host = self._create_server()
|
||||
|
||||
# only one instance created, so usage on its host
|
||||
self.assertUsage(src_host, 1)
|
||||
self.assertUsage(dst_host, 0)
|
||||
|
||||
orig_drop_claim = rt.ResourceTracker.drop_move_claim
|
||||
orig_cleanup = fake_driver.FakeDriver.cleanup
|
||||
|
||||
def fake_drop_move_claim(*args, **kwargs):
|
||||
# run periodics after marking the migration confirmed, simulating a
|
||||
# race between the doing this and actually dropping the claim
|
||||
|
||||
# check the usage, which should show usage on both hosts
|
||||
self.assertUsage(src_host, 1)
|
||||
self.assertUsage(dst_host, 1)
|
||||
|
||||
if drop_race:
|
||||
self._run_periodics()
|
||||
|
||||
self.assertUsage(src_host, 1)
|
||||
self.assertUsage(dst_host, 1)
|
||||
|
||||
return orig_drop_claim(*args, **kwargs)
|
||||
|
||||
def fake_cleanup(_self, _context, _instance, *args, **kwargs):
|
||||
self.assertIsNotNone(_instance.old_flavor)
|
||||
# FIXME(stephenfin): This should not be unset
|
||||
self.assertIsNone(_instance.new_flavor)
|
||||
return orig_cleanup(_self, _context, _instance, *args, **kwargs)
|
||||
|
||||
# TODO(stephenfin): Use a helper
|
||||
self.api.post_server_action(server['id'], {'migrate': None})
|
||||
self._wait_for_state_change(server, 'VERIFY_RESIZE')
|
||||
|
||||
# migration isn't complete so we should have usage on both hosts
|
||||
self.assertUsage(src_host, 1)
|
||||
self.assertUsage(dst_host, 1)
|
||||
|
||||
with test.nested(
|
||||
mock.patch(
|
||||
'nova.compute.resource_tracker.ResourceTracker'
|
||||
'.drop_move_claim',
|
||||
new=fake_drop_move_claim),
|
||||
mock.patch('nova.virt.fake.FakeDriver.cleanup', new=fake_cleanup),
|
||||
):
|
||||
self._confirm_resize(server, cross_cell=True)
|
||||
|
||||
# migration is now confirmed so we should once again only have usage on
|
||||
# one host
|
||||
self.assertUsage(src_host, 0)
|
||||
self.assertUsage(dst_host, 1)
|
||||
|
||||
# running periodics shouldn't change things
|
||||
self._run_periodics()
|
||||
|
||||
self.assertUsage(src_host, 0)
|
||||
self.assertUsage(dst_host, 1)
|
||||
|
||||
@ddt.data(True, False)
|
||||
def test_migrate_revert(self, drop_race):
|
||||
server, src_host, dst_host = self._create_server()
|
||||
|
||||
# only one instance created, so usage on its host
|
||||
self.assertUsage(src_host, 1)
|
||||
self.assertUsage(dst_host, 0)
|
||||
|
||||
orig_drop_claim = rt.ResourceTracker.drop_move_claim
|
||||
orig_finish_revert = fake_driver.FakeDriver.finish_revert_migration
|
||||
|
||||
def fake_drop_move_claim(*args, **kwargs):
|
||||
# run periodics after marking the migration reverted, simulating a
|
||||
# race between the doing this and actually dropping the claim
|
||||
|
||||
# check the usage, which should show usage on both hosts
|
||||
self.assertUsage(src_host, 1)
|
||||
self.assertUsage(dst_host, 1)
|
||||
|
||||
if drop_race:
|
||||
self._run_periodics()
|
||||
|
||||
self.assertUsage(src_host, 1)
|
||||
self.assertUsage(dst_host, 1)
|
||||
|
||||
return orig_drop_claim(*args, **kwargs)
|
||||
|
||||
def fake_finish_revert_migration(_self, _context, _instance, *a, **kw):
|
||||
self.assertIsNotNone(_instance.old_flavor)
|
||||
# FIXME(stephenfin): This should not be unset
|
||||
self.assertIsNone(_instance.new_flavor)
|
||||
return orig_finish_revert(_self, _context, _instance, *a, **kw)
|
||||
|
||||
# TODO(stephenfin): Use a helper
|
||||
self.api.post_server_action(server['id'], {'migrate': None})
|
||||
self._wait_for_state_change(server, 'VERIFY_RESIZE')
|
||||
|
||||
# migration isn't complete so we should have usage on both hosts
|
||||
self.assertUsage(src_host, 1)
|
||||
self.assertUsage(dst_host, 1)
|
||||
|
||||
with test.nested(
|
||||
mock.patch(
|
||||
'nova.compute.resource_tracker.ResourceTracker'
|
||||
'.drop_move_claim',
|
||||
new=fake_drop_move_claim),
|
||||
mock.patch(
|
||||
'nova.virt.fake.FakeDriver.finish_revert_migration',
|
||||
new=fake_finish_revert_migration),
|
||||
):
|
||||
self._revert_resize(server)
|
||||
|
||||
# migration is now reverted so we should once again only have usage on
|
||||
# one host
|
||||
self.assertUsage(src_host, 1)
|
||||
self.assertUsage(dst_host, 0)
|
||||
|
||||
# running periodics shouldn't change things
|
||||
self._run_periodics()
|
||||
|
||||
self.assertUsage(src_host, 1)
|
||||
self.assertUsage(dst_host, 0)
|
||||
|
|
Loading…
Reference in New Issue