From ed3e27afb4ca7b36baab3b8008a727caa43e1e3b Mon Sep 17 00:00:00 2001 From: Mohammed Naser Date: Wed, 14 Mar 2018 20:03:47 +0000 Subject: [PATCH] Revert "Refine waiting for vif plug events during _hard_reboot" This reverts commit aaf37a26d6caa124f0cc6c3fe21bfdf58ccb8517. This gets us back to Ib0cf5d55750f13d0499a570f14024dca551ed4d4 which was meant to address an issue introduced by Id188d48609f3d22d14e16c7f6114291d547a8986. So we essentially had three changes: 1. Hard reboot would blow away volumes and vifs and then wait for the vifs to be plugged; this caused a problem for some vif types ( linuxbridge was reported) because the event never came and we timed out. 2. To workaround that, a second change was made to simply not wait for vif plugging events. 3. Since #2 was a bit heavy-handed for a problem that didn't impact openvswitch, another change was made to only wait for non-bridge vif types, so we'd wait for OVS. But it turns out that opendaylight is an OVS vif type and doesn't send events for plugging the vif, only for binding the port (and we don't re-bind the port during reboot). There is also a report of this being a problem for other types of ports, see If209f77cff2de00f694b01b2507c633ec3882c82. So rather than try to special-case every possible vif type that could be impacted by this, we are simply reverting the change so we no longer wait for vif plugged events during hard reboot. Note that if we went back to Id188d48609f3d22d14e16c7f6114291d547a8986 and tweaked that to not unplug/plug the vifs we wouldn't have this problem either, and that change was really meant to deal with an encrypted volume issue on reboot. But changing that logic is out of the scope of this change. Alternatively, we could re-bind the port during reboot but that could have other implications, or neutron could put something into the port details telling us which vifs will send events and which won't, but again that's all outside of the scope of this patch. Change-Id: Ib3f10706a7191c58909ec1938042ce338df4d499 Closes-Bug: #1755890 --- nova/tests/unit/virt/libvirt/test_driver.py | 37 +------------ nova/virt/libvirt/driver.py | 61 ++++++--------------- 2 files changed, 19 insertions(+), 79 deletions(-) diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 77cf1995528e..1ec8f446fa1b 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -12661,28 +12661,6 @@ class LibvirtConnTestCase(test.NoDBTestCase, mock_hard_reboot.assert_called_once_with(self.context, instance, [], None) - @mock.patch('nova.virt.libvirt.LibvirtDriver._get_neutron_events') - @mock.patch('nova.virt.libvirt.LibvirtDriver.plug_vifs') - @mock.patch('nova.virt.libvirt.LibvirtDriver._lxc_disk_handler') - @mock.patch('nova.virt.libvirt.LibvirtDriver._create_domain') - def test_create_domain_and_network_reboot(self, mock_create, mock_handler, - mock_plug, mock_events): - # Verify that we call get_neutron_events with reboot=True if - # create_domain_and_network was called with reboot=True - drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) - instance = objects.Instance(**self.test_instance) - network_info = _fake_network_info(self, 1) - - @mock.patch.object(drvr.firewall_driver, 'setup_basic_filtering') - @mock.patch.object(drvr.firewall_driver, 'prepare_instance_filter') - @mock.patch.object(drvr.firewall_driver, 'apply_instance_filter') - def _do_create(mock_apply, mock_prepare, mock_setup): - drvr._create_domain_and_network(self.context, mock.sentinel.xml, - instance, network_info, - reboot=True) - _do_create() - mock_events.assert_called_once_with(network_info, reboot=True) - @mock.patch('nova.virt.libvirt.LibvirtDriver.get_info') @mock.patch('nova.virt.libvirt.LibvirtDriver._create_domain_and_network') @mock.patch('nova.virt.libvirt.LibvirtDriver._get_guest_xml') @@ -12752,7 +12730,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, block_device_info=block_device_info, mdevs=[uuids.mdev1]) mock_create_domain_and_network.assert_called_once_with(self.context, dummyxml, instance, network_info, - block_device_info=block_device_info, reboot=True) + block_device_info=block_device_info, vifs_already_plugged=True) @mock.patch('oslo_utils.fileutils.ensure_tree') @mock.patch('oslo_service.loopingcall.FixedIntervalLoopingCall') @@ -15655,19 +15633,6 @@ class LibvirtConnTestCase(test.NoDBTestCase, events = drvr._get_neutron_events(network_info) self.assertEqual([('network-vif-plugged', '1')], events) - def test_get_neutron_events_reboot(self): - drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) - bridge = network_model.VIF_TYPE_BRIDGE - ovs = network_model.VIF_TYPE_OVS - network_info = [network_model.VIF(id='1'), - network_model.VIF(id='2', active=True), - network_model.VIF(id='3', type=bridge), - network_model.VIF(id='4', type=ovs)] - events = drvr._get_neutron_events(network_info, reboot=True) - self.assertEqual([('network-vif-plugged', '1'), - ('network-vif-plugged', '2'), - ('network-vif-plugged', '4')], events) - def test_unplug_vifs_ignores_errors(self): drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI()) with mock.patch.object(drvr, 'vif_driver') as vif_driver: diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index e8b4bf0aaf24..8463c248536c 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -2708,9 +2708,14 @@ class LibvirtDriver(driver.ComputeDriver): # Initialize all the necessary networking, block devices and # start the instance. - self._create_domain_and_network( - context, xml, instance, network_info, - block_device_info=block_device_info, reboot=True) + # NOTE(melwitt): Pass vifs_already_plugged=True here even though we've + # unplugged vifs earlier. The behavior of neutron plug events depends + # on which vif type we're using and we are working with a stale network + # info cache here, so won't rely on waiting for neutron plug events. + # vifs_already_plugged=True means "do not wait for neutron plug events" + self._create_domain_and_network(context, xml, instance, network_info, + block_device_info=block_device_info, + vifs_already_plugged=True) self._prepare_pci_devices_for_use( pci_manager.get_instance_pci_devs(instance, 'all')) @@ -5380,25 +5385,14 @@ class LibvirtDriver(driver.ComputeDriver): if CONF.vif_plugging_is_fatal: raise exception.VirtualInterfaceCreateException() - def _get_neutron_events(self, network_info, reboot=False): - def eventable(vif): - if reboot: - # NOTE(melwitt): We won't expect events for the bridge vif - # type during a reboot because the neutron agent might not - # detect that we have unplugged and plugged vifs with os-vif. - # We also disregard the 'active' status of the vif during a - # reboot because the stale network_info we get from the compute - # manager won't show active=False for the vifs we've unplugged. - return vif.get('type') != network_model.VIF_TYPE_BRIDGE - # NOTE(danms): We need to collect any VIFs that are currently - # down that we expect a down->up event for. Anything that is - # already up will not undergo that transition, and for - # anything that might be stale (cache-wise) assume it's - # already up so we don't block on it. - return vif.get('active', True) is False - + def _get_neutron_events(self, network_info): + # NOTE(danms): We need to collect any VIFs that are currently + # down that we expect a down->up event for. Anything that is + # already up will not undergo that transition, and for + # anything that might be stale (cache-wise) assume it's + # already up so we don't block on it. return [('network-vif-plugged', vif['id']) - for vif in network_info if eventable(vif)] + for vif in network_info if vif.get('active', True) is False] def _cleanup_failed_start(self, context, instance, network_info, block_device_info, guest, destroy_disks): @@ -5414,33 +5408,14 @@ class LibvirtDriver(driver.ComputeDriver): block_device_info=None, power_on=True, vifs_already_plugged=False, post_xml_callback=None, - destroy_disks_on_failure=False, - reboot=False): + destroy_disks_on_failure=False): - """Do required network setup and create domain. - - :param context: nova.context.RequestContext for volume API calls - :param xml: Guest domain XML - :param instance: nova.objects.Instance object - :param network_info: nova.network.model.NetworkInfo for the instance - :param block_device_info: Legacy block device info dict - :param power_on: Whether to power on the guest after creating the XML - :param vifs_already_plugged: False means "wait for neutron plug events" - if using neutron, qemu/kvm, power_on=True, - and CONF.vif_plugging_timeout configured - :param post_xml_callback: Optional callback to call after creating the - guest domain XML - :param destroy_disks_on_failure: Whether to destroy the disks if we - fail during guest domain creation - :param reboot: Whether or not this is being called during a reboot. If - we are rebooting, we will need to handle waiting for - neutron plug events differently - """ + """Do required network setup and create domain.""" timeout = CONF.vif_plugging_timeout if (self._conn_supports_start_paused and utils.is_neutron() and not vifs_already_plugged and power_on and timeout): - events = self._get_neutron_events(network_info, reboot=reboot) + events = self._get_neutron_events(network_info) else: events = []