Merge "Add a WA flag waiting for vif-plugged event during reboot"
This commit is contained in:
commit
69b0d31d20
@ -245,6 +245,12 @@
|
||||
# reduce the number of placement calls in steady state. Added in
|
||||
# Stein.
|
||||
resource_provider_association_refresh: 0
|
||||
workarounds:
|
||||
# This wa is an improvement on hard reboot that cannot be turned
|
||||
# on unconditionally. But we know that ml2/ovs sends plug time
|
||||
# events so we can enable this in this ovs job for vnic_type
|
||||
# normal
|
||||
wait_for_vif_plugged_event_during_hard_reboot: normal
|
||||
$NOVA_CONF:
|
||||
quota:
|
||||
# Added in Train.
|
||||
|
@ -299,6 +299,65 @@ cases the correct fix it to update the guest image kernel to one that is
|
||||
patched however in some cases this is not possible. This workaround allows the
|
||||
emulation of an apic to be disabled per host however it is not recommended to
|
||||
use outside of a CI or developer cloud.
|
||||
"""),
|
||||
cfg.ListOpt('wait_for_vif_plugged_event_during_hard_reboot',
|
||||
item_type=cfg.types.String(
|
||||
choices=[
|
||||
"normal",
|
||||
"direct",
|
||||
"macvtap",
|
||||
"baremetal",
|
||||
"direct-physical",
|
||||
"virtio-forwarder",
|
||||
"smart-nic",
|
||||
"vdpa",
|
||||
"accelerator-direct",
|
||||
"accelerator-direct-physical",
|
||||
]),
|
||||
default=[],
|
||||
help="""
|
||||
The libvirt virt driver implements power on and hard reboot by tearing down
|
||||
every vif of the instance being rebooted then plug them again. By default nova
|
||||
does not wait for network-vif-plugged event from neutron before it lets the
|
||||
instance run. This can cause the instance to requests the IP via DHCP before
|
||||
the neutron backend has a chance to set up the networking backend after the vif
|
||||
plug.
|
||||
|
||||
This flag defines which vifs nova expects network-vif-plugged events from
|
||||
during hard reboot. The possible values are neutron port vnic types:
|
||||
|
||||
* normal
|
||||
* direct
|
||||
* macvtap
|
||||
* baremetal
|
||||
* direct-physical
|
||||
* virtio-forwarder
|
||||
* smart-nic
|
||||
* vdpa
|
||||
* accelerator-direct
|
||||
* accelerator-direct-physical
|
||||
|
||||
Adding a ``vnic_type`` to this configuration makes Nova wait for a
|
||||
network-vif-plugged event for each of the instance's vifs having the specific
|
||||
``vnic_type`` before unpausing the instance, similarly to how new instance
|
||||
creation works.
|
||||
|
||||
Please note that not all neutron networking backends send plug time events, for
|
||||
certain ``vnic_type`` therefore this config is empty by default.
|
||||
|
||||
The ml2/ovs and the networking-odl backends are known to send plug time events
|
||||
for ports with ``normal`` ``vnic_type`` so it is safe to add ``normal`` to this
|
||||
config if you are using only those backends in the compute host.
|
||||
|
||||
The neutron in-tree SRIOV backend does not reliably send network-vif-plugged
|
||||
event during plug time for ports with ``direct`` vnic_type and never sends
|
||||
that event for port with ``direct-physical`` vnic_type during plug time. For
|
||||
other ``vnic_type`` and backend pairs, please consult the developers of the
|
||||
backend.
|
||||
|
||||
Related options:
|
||||
|
||||
* :oslo.config:option:`DEFAULT.vif_plugging_timeout`
|
||||
"""),
|
||||
]
|
||||
|
||||
|
@ -16221,7 +16221,48 @@ class LibvirtConnTestCase(test.NoDBTestCase,
|
||||
accel_info=accel_info)
|
||||
mock_create_guest_with_network.assert_called_once_with(self.context,
|
||||
dummyxml, instance, network_info, block_device_info,
|
||||
vifs_already_plugged=True)
|
||||
vifs_already_plugged=True, external_events=[])
|
||||
|
||||
@mock.patch('oslo_utils.fileutils.ensure_tree', new=mock.Mock())
|
||||
@mock.patch('nova.virt.libvirt.LibvirtDriver.get_info')
|
||||
@mock.patch('nova.virt.libvirt.LibvirtDriver._create_guest_with_network')
|
||||
@mock.patch('nova.virt.libvirt.LibvirtDriver._get_guest_xml')
|
||||
@mock.patch('nova.virt.libvirt.LibvirtDriver.destroy', new=mock.Mock())
|
||||
@mock.patch(
|
||||
'nova.virt.libvirt.LibvirtDriver._get_all_assigned_mediated_devices',
|
||||
new=mock.Mock(return_value={}))
|
||||
def test_hard_reboot_wait_for_plug(
|
||||
self, mock_get_guest_xml, mock_create_guest_with_network, mock_get_info
|
||||
):
|
||||
self.flags(
|
||||
group="workarounds",
|
||||
wait_for_vif_plugged_event_during_hard_reboot=["normal"])
|
||||
self.context.auth_token = None
|
||||
instance = objects.Instance(**self.test_instance)
|
||||
network_info = _fake_network_info(self, num_networks=4)
|
||||
network_info[0]["vnic_type"] = "normal"
|
||||
network_info[1]["vnic_type"] = "direct"
|
||||
network_info[2]["vnic_type"] = "normal"
|
||||
network_info[3]["vnic_type"] = "direct-physical"
|
||||
block_device_info = None
|
||||
return_values = [hardware.InstanceInfo(state=power_state.SHUTDOWN),
|
||||
hardware.InstanceInfo(state=power_state.RUNNING)]
|
||||
mock_get_info.side_effect = return_values
|
||||
mock_get_guest_xml.return_value = mock.sentinel.xml
|
||||
|
||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||
drvr._hard_reboot(
|
||||
self.context, instance, network_info, block_device_info)
|
||||
|
||||
mock_create_guest_with_network.assert_called_once_with(
|
||||
self.context, mock.sentinel.xml, instance, network_info,
|
||||
block_device_info,
|
||||
vifs_already_plugged=False,
|
||||
external_events=[
|
||||
('network-vif-plugged', uuids.vif1),
|
||||
('network-vif-plugged', uuids.vif3),
|
||||
]
|
||||
)
|
||||
|
||||
@mock.patch('oslo_utils.fileutils.ensure_tree')
|
||||
@mock.patch('oslo_service.loopingcall.FixedIntervalLoopingCall')
|
||||
|
@ -3818,11 +3818,32 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
# on which vif type we're using and we are working with a stale network
|
||||
# info cache here, so won't rely on waiting for neutron plug events.
|
||||
# vifs_already_plugged=True means "do not wait for neutron plug events"
|
||||
external_events = []
|
||||
vifs_already_plugged = True
|
||||
event_expected_for_vnic_types = (
|
||||
CONF.workarounds.wait_for_vif_plugged_event_during_hard_reboot)
|
||||
if event_expected_for_vnic_types:
|
||||
# NOTE(gibi): We unplugged every vif during destroy above and we
|
||||
# will replug them with _create_guest_with_network. As the
|
||||
# workaround config has some vnic_types configured we expect
|
||||
# vif-plugged events for every vif with those vnic_types.
|
||||
# TODO(gibi): only wait for events if we know that the networking
|
||||
# backend sends plug time events. For that we need to finish
|
||||
# https://bugs.launchpad.net/neutron/+bug/1821058 first in Neutron
|
||||
# then create a driver -> plug-time event mapping in nova.
|
||||
external_events = [
|
||||
('network-vif-plugged', vif['id'])
|
||||
for vif in network_info
|
||||
if vif['vnic_type'] in event_expected_for_vnic_types
|
||||
]
|
||||
vifs_already_plugged = False
|
||||
|
||||
# NOTE(efried): The instance should already have a vtpm_secret_uuid
|
||||
# registered if appropriate.
|
||||
self._create_guest_with_network(
|
||||
context, xml, instance, network_info, block_device_info,
|
||||
vifs_already_plugged=True)
|
||||
vifs_already_plugged=vifs_already_plugged,
|
||||
external_events=external_events)
|
||||
|
||||
def _wait_for_reboot():
|
||||
"""Called at an interval until the VM is running again."""
|
||||
@ -7216,7 +7237,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
power_on: bool = True,
|
||||
vifs_already_plugged: bool = False,
|
||||
post_xml_callback: ty.Callable = None,
|
||||
external_events: ty.Optional[ty.List[str]] = None,
|
||||
external_events: ty.Optional[ty.List[ty.Tuple[str, str]]] = None,
|
||||
cleanup_instance_dir: bool = False,
|
||||
cleanup_instance_disks: bool = False,
|
||||
) -> libvirt_guest.Guest:
|
||||
|
@ -0,0 +1,18 @@
|
||||
---
|
||||
issues:
|
||||
- |
|
||||
The libvirt virt driver in Nova implements power on and hard reboot by
|
||||
destroying the domain first and unpluging the vifs then recreating the
|
||||
domain and replugging the vifs. However nova does not wait for the
|
||||
network-vif-plugged event before unpause the domain. This can cause
|
||||
the domain to start running and requesting IP via DHCP before the
|
||||
networking backend has finished plugging the vifs. The config option
|
||||
[workarounds]wait_for_vif_plugged_event_during_hard_reboot has been added,
|
||||
defaulting to an empty list, that can be used to ensure that the libvirt
|
||||
driver waits for the network-vif-plugged event for vifs with specific
|
||||
``vnic_type`` before it unpauses the domain during hard reboot. This should
|
||||
only be used if the deployment uses a networking backend that sends such
|
||||
event for the given ``vif_type`` at vif plug time. The ml2/ovs and the
|
||||
networking-odl Neutron backend is known to send plug time events for ports
|
||||
with ``normal`` ``vnic_type``. For more information see
|
||||
https://bugs.launchpad.net/nova/+bug/1946729
|
Loading…
Reference in New Issue
Block a user