Revert "Refine waiting for vif plug events during _hard_reboot"

This reverts commit 5a10047f9d.

This gets us back to Ib0cf5d55750f13d0499a570f14024dca551ed4d4
which was meant to address an issue introduced
by Id188d48609f3d22d14e16c7f6114291d547a8986.

So we essentially had three changes:

1. Hard reboot would blow away volumes and vifs and then wait for the
   vifs to be plugged; this caused a problem for some vif types (
   linuxbridge was reported) because the event never came and we
   timed out.

2. To workaround that, a second change was made to simply not wait for
   vif plugging events.

3. Since #2 was a bit heavy-handed for a problem that didn't impact
   openvswitch, another change was made to only wait for non-bridge vif
   types, so we'd wait for OVS.

But it turns out that opendaylight is an OVS vif type and doesn't send
events for plugging the vif, only for binding the port (and we don't
re-bind the port during reboot). There is also a report of this being a
problem for other types of ports, see
If209f77cff2de00f694b01b2507c633ec3882c82.

So rather than try to special-case every possible vif type that could
be impacted by this, we are simply reverting the change so we no longer
wait for vif plugged events during hard reboot.

Note that if we went back to Id188d48609f3d22d14e16c7f6114291d547a8986
and tweaked that to not unplug/plug the vifs we wouldn't have this
problem either, and that change was really meant to deal with an
encrypted volume issue on reboot. But changing that logic is out of the
scope of this change. Alternatively, we could re-bind the port during
reboot but that could have other implications, or neutron could put
something into the port details telling us which vifs will send events
and which won't, but again that's all outside of the scope of this
patch.

Change-Id: Ib3f10706a7191c58909ec1938042ce338df4d499
Closes-Bug: #1755890
changes/17/553817/3
Mohammed Naser 5 years ago committed by Matt Riedemann
parent f41c24f21a
commit 2e03eae67d
  1. 37
      nova/tests/unit/virt/libvirt/test_driver.py
  2. 63
      nova/virt/libvirt/driver.py

@ -12775,28 +12775,6 @@ class LibvirtConnTestCase(test.NoDBTestCase,
mock_hard_reboot.assert_called_once_with(self.context,
instance, [], None)
@mock.patch('nova.virt.libvirt.LibvirtDriver._get_neutron_events')
@mock.patch('nova.virt.libvirt.LibvirtDriver.plug_vifs')
@mock.patch('nova.virt.libvirt.LibvirtDriver._lxc_disk_handler')
@mock.patch('nova.virt.libvirt.LibvirtDriver._create_domain')
def test_create_domain_and_network_reboot(self, mock_create, mock_handler,
mock_plug, mock_events):
# Verify that we call get_neutron_events with reboot=True if
# create_domain_and_network was called with reboot=True
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
instance = objects.Instance(**self.test_instance)
network_info = _fake_network_info(self, 1)
@mock.patch.object(drvr.firewall_driver, 'setup_basic_filtering')
@mock.patch.object(drvr.firewall_driver, 'prepare_instance_filter')
@mock.patch.object(drvr.firewall_driver, 'apply_instance_filter')
def _do_create(mock_apply, mock_prepare, mock_setup):
drvr._create_domain_and_network(self.context, mock.sentinel.xml,
instance, network_info,
reboot=True)
_do_create()
mock_events.assert_called_once_with(network_info, reboot=True)
@mock.patch('nova.virt.libvirt.LibvirtDriver.get_info')
@mock.patch('nova.virt.libvirt.LibvirtDriver._create_domain_and_network')
@mock.patch('nova.virt.libvirt.LibvirtDriver._get_guest_xml')
@ -12866,7 +12844,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
block_device_info=block_device_info, mdevs=[uuids.mdev1])
mock_create_domain_and_network.assert_called_once_with(self.context,
dummyxml, instance, network_info,
block_device_info=block_device_info, reboot=True)
block_device_info=block_device_info, vifs_already_plugged=True)
@mock.patch('oslo_utils.fileutils.ensure_tree')
@mock.patch('oslo_service.loopingcall.FixedIntervalLoopingCall')
@ -15770,19 +15748,6 @@ class LibvirtConnTestCase(test.NoDBTestCase,
events = drvr._get_neutron_events(network_info)
self.assertEqual([('network-vif-plugged', '1')], events)
def test_get_neutron_events_reboot(self):
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
bridge = network_model.VIF_TYPE_BRIDGE
ovs = network_model.VIF_TYPE_OVS
network_info = [network_model.VIF(id='1'),
network_model.VIF(id='2', active=True),
network_model.VIF(id='3', type=bridge),
network_model.VIF(id='4', type=ovs)]
events = drvr._get_neutron_events(network_info, reboot=True)
self.assertEqual([('network-vif-plugged', '1'),
('network-vif-plugged', '2'),
('network-vif-plugged', '4')], events)
def test_unplug_vifs_ignores_errors(self):
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI())
with mock.patch.object(drvr, 'vif_driver') as vif_driver:

@ -2750,9 +2750,14 @@ class LibvirtDriver(driver.ComputeDriver):
# Initialize all the necessary networking, block devices and
# start the instance.
self._create_domain_and_network(
context, xml, instance, network_info,
block_device_info=block_device_info, reboot=True)
# NOTE(melwitt): Pass vifs_already_plugged=True here even though we've
# unplugged vifs earlier. The behavior of neutron plug events depends
# on which vif type we're using and we are working with a stale network
# info cache here, so won't rely on waiting for neutron plug events.
# vifs_already_plugged=True means "do not wait for neutron plug events"
self._create_domain_and_network(context, xml, instance, network_info,
block_device_info=block_device_info,
vifs_already_plugged=True)
self._prepare_pci_devices_for_use(
pci_manager.get_instance_pci_devs(instance, 'all'))
@ -5422,25 +5427,14 @@ class LibvirtDriver(driver.ComputeDriver):
if CONF.vif_plugging_is_fatal:
raise exception.VirtualInterfaceCreateException()
def _get_neutron_events(self, network_info, reboot=False):
def eventable(vif):
if reboot:
# NOTE(melwitt): We won't expect events for the bridge vif
# type during a reboot because the neutron agent might not
# detect that we have unplugged and plugged vifs with os-vif.
# We also disregard the 'active' status of the vif during a
# reboot because the stale network_info we get from the compute
# manager won't show active=False for the vifs we've unplugged.
return vif.get('type') != network_model.VIF_TYPE_BRIDGE
# NOTE(danms): We need to collect any VIFs that are currently
# down that we expect a down->up event for. Anything that is
# already up will not undergo that transition, and for
# anything that might be stale (cache-wise) assume it's
# already up so we don't block on it.
return vif.get('active', True) is False
def _get_neutron_events(self, network_info):
# NOTE(danms): We need to collect any VIFs that are currently
# down that we expect a down->up event for. Anything that is
# already up will not undergo that transition, and for
# anything that might be stale (cache-wise) assume it's
# already up so we don't block on it.
return [('network-vif-plugged', vif['id'])
for vif in network_info if eventable(vif)]
for vif in network_info if vif.get('active', True) is False]
def _cleanup_failed_start(self, context, instance, network_info,
block_device_info, guest, destroy_disks):
@ -5456,33 +5450,14 @@ class LibvirtDriver(driver.ComputeDriver):
block_device_info=None, power_on=True,
vifs_already_plugged=False,
post_xml_callback=None,
destroy_disks_on_failure=False,
reboot=False):
"""Do required network setup and create domain.
:param context: nova.context.RequestContext for volume API calls
:param xml: Guest domain XML
:param instance: nova.objects.Instance object
:param network_info: nova.network.model.NetworkInfo for the instance
:param block_device_info: Legacy block device info dict
:param power_on: Whether to power on the guest after creating the XML
:param vifs_already_plugged: False means "wait for neutron plug events"
if using neutron, qemu/kvm, power_on=True,
and CONF.vif_plugging_timeout configured
:param post_xml_callback: Optional callback to call after creating the
guest domain XML
:param destroy_disks_on_failure: Whether to destroy the disks if we
fail during guest domain creation
:param reboot: Whether or not this is being called during a reboot. If
we are rebooting, we will need to handle waiting for
neutron plug events differently
"""
destroy_disks_on_failure=False):
"""Do required network setup and create domain."""
timeout = CONF.vif_plugging_timeout
if (self._conn_supports_start_paused and
utils.is_neutron() and not
vifs_already_plugged and power_on and timeout):
events = self._get_neutron_events(network_info, reboot=reboot)
events = self._get_neutron_events(network_info)
else:
events = []

Loading…
Cancel
Save