post live migration: don't call Neutron needlessly
In bug 1879787, the call to network_api.get_instance_nw_info() in
_post_live_migration() on the source compute manager eventually calls
out to the Neutron REST API. If this fails, the exception is
unhandled, and the migrating instance - which is fully running on the
destination at this point - will never be updated in the database.
This update normally happens later in
post_live_migration_at_destination().
The network_info variable obtained from get_instance_nw_info() is used
for two things: notifications - which aren't critical - and unplugging
the instance's vifs on the source - which is very important!
It turns out that at the time of the get_instance_nw_info() call, the
network info in the instance info cache is still valid for unplugging
the source vifs. The port bindings on the destination are only
activated by the network_api.migrate_instance_start() [1] call that
happens shortly *after* the problematic get_instance_nw_info() call.
In other words, get_instance_nw_info() will always return the source
ports. Because of that, we can replace it with a call to
instance.get_network_info().
[1] d9e04c4ff0/nova/network/neutronv2/api.py (L2493-L2522)
Change-Id: If0fbae33ce2af198188c91638afef939256c2556
Closes-bug: 1879787
This commit is contained in:
parent
64b4271279
commit
6488a5dfb2
@ -8382,7 +8382,13 @@ class ComputeManager(manager.Manager):
|
||||
# Releasing vlan.
|
||||
# (not necessary in current implementation?)
|
||||
|
||||
network_info = self.network_api.get_instance_nw_info(ctxt, instance)
|
||||
# NOTE(artom) At this point in time we have not bound the ports to the
|
||||
# destination host yet (this happens in migrate_instance_start()
|
||||
# below). Therefore, the "old" source network info that's still in the
|
||||
# instance info cache is safe to use here, since it'll be used below
|
||||
# during driver.post_live_migration_at_source() to unplug the VIFs on
|
||||
# the source.
|
||||
network_info = instance.get_network_info()
|
||||
|
||||
self._notify_about_instance_usage(ctxt, instance,
|
||||
"live_migration._post.start",
|
||||
|
@ -177,7 +177,8 @@ class TestVolAttachmentsDuringLiveMigration(
|
||||
self.assertEqual(1, len(attachments))
|
||||
|
||||
|
||||
class LiveMigrationNeutronFailure(integrated_helpers._IntegratedTestBase):
|
||||
class LiveMigrationNeutronInteractionsTest(
|
||||
integrated_helpers._IntegratedTestBase):
|
||||
# NOTE(artom) We need the admin API to force the host when booting the test
|
||||
# server.
|
||||
ADMIN_API = True
|
||||
@ -187,38 +188,29 @@ class LiveMigrationNeutronFailure(integrated_helpers._IntegratedTestBase):
|
||||
self._start_compute('src')
|
||||
self._start_compute('dest')
|
||||
|
||||
def test_live_migrate_get_nw_info_fails(self):
|
||||
"""Test that if the driver.post_live_migration() call fails (for
|
||||
example by not being able to connect to Neutron), the exception goes
|
||||
unhandled and results in the live-migration erroring out. This is bug
|
||||
1879787.
|
||||
def test_live_migrate_vifs_from_info_cache(self):
|
||||
"""Test that bug 1879787 can no longer manifest itself because we get
|
||||
the network_info from the instance info cache, and not Neutron.
|
||||
"""
|
||||
def stub_notify(context, instance, event_suffix,
|
||||
network_info=None, extra_usage_info=None, fault=None):
|
||||
vif = network_info[0]
|
||||
# Make sure we have the correct VIF (the NeutronFixture
|
||||
# deterministically uses port_2 for networks=auto) and that the
|
||||
# profile does not contain `migrating_to`, indicating that we did
|
||||
# not obtain it from the Neutron API.
|
||||
self.assertEqual(self.neutron.port_2['id'], vif['id'])
|
||||
self.assertNotIn('migrating_to', vif['profile'])
|
||||
|
||||
server = self._create_server(networks='auto',
|
||||
host=self.computes['src'].host)
|
||||
|
||||
orig_plm = self.computes['src'].manager._post_live_migration
|
||||
|
||||
def stub_plm(*args, **kwargs):
|
||||
"""We simulate a failure in driver.post_live_migration() on the
|
||||
source by stubbing the source compute's _post_live_migration() with
|
||||
a method that, within a context that mocks
|
||||
driver.post_live_migration() to raise an exception, calls the
|
||||
original compute.manager._post_live_migration(). This is needed to
|
||||
make sure driver.post_live_migration() raises only once, and only
|
||||
on the source.
|
||||
"""
|
||||
with mock.patch.object(self.computes['src'].manager.network_api,
|
||||
'get_instance_nw_info',
|
||||
side_effect=ConnectionError):
|
||||
return orig_plm(*args, **kwargs)
|
||||
|
||||
with mock.patch.object(self.computes['src'].manager,
|
||||
'_post_live_migration',
|
||||
side_effect=stub_plm):
|
||||
# FIXME(artom) Until bug 1879787 is fixed, the raised
|
||||
# ConnectionError will go unhandled, the migration will fail, and
|
||||
# the instance will still be reported as being on the source, even
|
||||
# though it's actually running on the destination.
|
||||
self._live_migrate(server, 'error', server_expected_state='ERROR')
|
||||
'_notify_about_instance_usage',
|
||||
side_effect=stub_notify) as mock_notify:
|
||||
self._live_migrate(server, 'completed')
|
||||
server = self.api.get_server(server['id'])
|
||||
self.assertEqual('src', server['OS-EXT-SRV-ATTR:host'])
|
||||
self.assertEqual('dest', server['OS-EXT-SRV-ATTR:host'])
|
||||
# We don't care about call arguments here, we just want to be sure
|
||||
# our stub actually got called.
|
||||
mock_notify.assert_called()
|
||||
|
@ -6519,12 +6519,15 @@ class ComputeTestCase(BaseTestCase,
|
||||
'clear_events_for_instance'),
|
||||
mock.patch.object(self.compute, 'update_available_resource'),
|
||||
mock.patch.object(migration_obj, 'save'),
|
||||
mock.patch.object(instance, 'get_network_info'),
|
||||
) as (
|
||||
post_live_migration,
|
||||
migrate_instance_start, post_live_migration_at_destination,
|
||||
post_live_migration_at_source, setup_networks_on_host,
|
||||
clear_events, update_available_resource, mig_save
|
||||
clear_events, update_available_resource, mig_save, get_nw_info,
|
||||
):
|
||||
nw_info = network_model.NetworkInfo.hydrate([])
|
||||
get_nw_info.return_value = nw_info
|
||||
self.compute._post_live_migration(c, instance, dest,
|
||||
migrate_data=migrate_data,
|
||||
source_bdms=bdms)
|
||||
@ -6546,7 +6549,7 @@ class ComputeTestCase(BaseTestCase,
|
||||
post_live_migration_at_destination.assert_has_calls([
|
||||
mock.call(c, instance, False, dest)])
|
||||
post_live_migration_at_source.assert_has_calls(
|
||||
[mock.call(c, instance, [])])
|
||||
[mock.call(c, instance, nw_info)])
|
||||
clear_events.assert_called_once_with(instance)
|
||||
update_available_resource.assert_has_calls([mock.call(c)])
|
||||
self.assertEqual('completed', migration_obj.status)
|
||||
|
Loading…
Reference in New Issue
Block a user