Merge "Add a WA flag waiting for vif-plugged event during reboot"
This commit is contained in:
commit
69b0d31d20
@ -245,6 +245,12 @@
|
|||||||
# reduce the number of placement calls in steady state. Added in
|
# reduce the number of placement calls in steady state. Added in
|
||||||
# Stein.
|
# Stein.
|
||||||
resource_provider_association_refresh: 0
|
resource_provider_association_refresh: 0
|
||||||
|
workarounds:
|
||||||
|
# This wa is an improvement on hard reboot that cannot be turned
|
||||||
|
# on unconditionally. But we know that ml2/ovs sends plug time
|
||||||
|
# events so we can enable this in this ovs job for vnic_type
|
||||||
|
# normal
|
||||||
|
wait_for_vif_plugged_event_during_hard_reboot: normal
|
||||||
$NOVA_CONF:
|
$NOVA_CONF:
|
||||||
quota:
|
quota:
|
||||||
# Added in Train.
|
# Added in Train.
|
||||||
|
@ -299,6 +299,65 @@ cases the correct fix it to update the guest image kernel to one that is
|
|||||||
patched however in some cases this is not possible. This workaround allows the
|
patched however in some cases this is not possible. This workaround allows the
|
||||||
emulation of an apic to be disabled per host however it is not recommended to
|
emulation of an apic to be disabled per host however it is not recommended to
|
||||||
use outside of a CI or developer cloud.
|
use outside of a CI or developer cloud.
|
||||||
|
"""),
|
||||||
|
cfg.ListOpt('wait_for_vif_plugged_event_during_hard_reboot',
|
||||||
|
item_type=cfg.types.String(
|
||||||
|
choices=[
|
||||||
|
"normal",
|
||||||
|
"direct",
|
||||||
|
"macvtap",
|
||||||
|
"baremetal",
|
||||||
|
"direct-physical",
|
||||||
|
"virtio-forwarder",
|
||||||
|
"smart-nic",
|
||||||
|
"vdpa",
|
||||||
|
"accelerator-direct",
|
||||||
|
"accelerator-direct-physical",
|
||||||
|
]),
|
||||||
|
default=[],
|
||||||
|
help="""
|
||||||
|
The libvirt virt driver implements power on and hard reboot by tearing down
|
||||||
|
every vif of the instance being rebooted then plug them again. By default nova
|
||||||
|
does not wait for network-vif-plugged event from neutron before it lets the
|
||||||
|
instance run. This can cause the instance to requests the IP via DHCP before
|
||||||
|
the neutron backend has a chance to set up the networking backend after the vif
|
||||||
|
plug.
|
||||||
|
|
||||||
|
This flag defines which vifs nova expects network-vif-plugged events from
|
||||||
|
during hard reboot. The possible values are neutron port vnic types:
|
||||||
|
|
||||||
|
* normal
|
||||||
|
* direct
|
||||||
|
* macvtap
|
||||||
|
* baremetal
|
||||||
|
* direct-physical
|
||||||
|
* virtio-forwarder
|
||||||
|
* smart-nic
|
||||||
|
* vdpa
|
||||||
|
* accelerator-direct
|
||||||
|
* accelerator-direct-physical
|
||||||
|
|
||||||
|
Adding a ``vnic_type`` to this configuration makes Nova wait for a
|
||||||
|
network-vif-plugged event for each of the instance's vifs having the specific
|
||||||
|
``vnic_type`` before unpausing the instance, similarly to how new instance
|
||||||
|
creation works.
|
||||||
|
|
||||||
|
Please note that not all neutron networking backends send plug time events, for
|
||||||
|
certain ``vnic_type`` therefore this config is empty by default.
|
||||||
|
|
||||||
|
The ml2/ovs and the networking-odl backends are known to send plug time events
|
||||||
|
for ports with ``normal`` ``vnic_type`` so it is safe to add ``normal`` to this
|
||||||
|
config if you are using only those backends in the compute host.
|
||||||
|
|
||||||
|
The neutron in-tree SRIOV backend does not reliably send network-vif-plugged
|
||||||
|
event during plug time for ports with ``direct`` vnic_type and never sends
|
||||||
|
that event for port with ``direct-physical`` vnic_type during plug time. For
|
||||||
|
other ``vnic_type`` and backend pairs, please consult the developers of the
|
||||||
|
backend.
|
||||||
|
|
||||||
|
Related options:
|
||||||
|
|
||||||
|
* :oslo.config:option:`DEFAULT.vif_plugging_timeout`
|
||||||
"""),
|
"""),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -16221,7 +16221,48 @@ class LibvirtConnTestCase(test.NoDBTestCase,
|
|||||||
accel_info=accel_info)
|
accel_info=accel_info)
|
||||||
mock_create_guest_with_network.assert_called_once_with(self.context,
|
mock_create_guest_with_network.assert_called_once_with(self.context,
|
||||||
dummyxml, instance, network_info, block_device_info,
|
dummyxml, instance, network_info, block_device_info,
|
||||||
vifs_already_plugged=True)
|
vifs_already_plugged=True, external_events=[])
|
||||||
|
|
||||||
|
@mock.patch('oslo_utils.fileutils.ensure_tree', new=mock.Mock())
|
||||||
|
@mock.patch('nova.virt.libvirt.LibvirtDriver.get_info')
|
||||||
|
@mock.patch('nova.virt.libvirt.LibvirtDriver._create_guest_with_network')
|
||||||
|
@mock.patch('nova.virt.libvirt.LibvirtDriver._get_guest_xml')
|
||||||
|
@mock.patch('nova.virt.libvirt.LibvirtDriver.destroy', new=mock.Mock())
|
||||||
|
@mock.patch(
|
||||||
|
'nova.virt.libvirt.LibvirtDriver._get_all_assigned_mediated_devices',
|
||||||
|
new=mock.Mock(return_value={}))
|
||||||
|
def test_hard_reboot_wait_for_plug(
|
||||||
|
self, mock_get_guest_xml, mock_create_guest_with_network, mock_get_info
|
||||||
|
):
|
||||||
|
self.flags(
|
||||||
|
group="workarounds",
|
||||||
|
wait_for_vif_plugged_event_during_hard_reboot=["normal"])
|
||||||
|
self.context.auth_token = None
|
||||||
|
instance = objects.Instance(**self.test_instance)
|
||||||
|
network_info = _fake_network_info(self, num_networks=4)
|
||||||
|
network_info[0]["vnic_type"] = "normal"
|
||||||
|
network_info[1]["vnic_type"] = "direct"
|
||||||
|
network_info[2]["vnic_type"] = "normal"
|
||||||
|
network_info[3]["vnic_type"] = "direct-physical"
|
||||||
|
block_device_info = None
|
||||||
|
return_values = [hardware.InstanceInfo(state=power_state.SHUTDOWN),
|
||||||
|
hardware.InstanceInfo(state=power_state.RUNNING)]
|
||||||
|
mock_get_info.side_effect = return_values
|
||||||
|
mock_get_guest_xml.return_value = mock.sentinel.xml
|
||||||
|
|
||||||
|
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||||
|
drvr._hard_reboot(
|
||||||
|
self.context, instance, network_info, block_device_info)
|
||||||
|
|
||||||
|
mock_create_guest_with_network.assert_called_once_with(
|
||||||
|
self.context, mock.sentinel.xml, instance, network_info,
|
||||||
|
block_device_info,
|
||||||
|
vifs_already_plugged=False,
|
||||||
|
external_events=[
|
||||||
|
('network-vif-plugged', uuids.vif1),
|
||||||
|
('network-vif-plugged', uuids.vif3),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
@mock.patch('oslo_utils.fileutils.ensure_tree')
|
@mock.patch('oslo_utils.fileutils.ensure_tree')
|
||||||
@mock.patch('oslo_service.loopingcall.FixedIntervalLoopingCall')
|
@mock.patch('oslo_service.loopingcall.FixedIntervalLoopingCall')
|
||||||
|
@ -3818,11 +3818,32 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||||||
# on which vif type we're using and we are working with a stale network
|
# on which vif type we're using and we are working with a stale network
|
||||||
# info cache here, so won't rely on waiting for neutron plug events.
|
# info cache here, so won't rely on waiting for neutron plug events.
|
||||||
# vifs_already_plugged=True means "do not wait for neutron plug events"
|
# vifs_already_plugged=True means "do not wait for neutron plug events"
|
||||||
|
external_events = []
|
||||||
|
vifs_already_plugged = True
|
||||||
|
event_expected_for_vnic_types = (
|
||||||
|
CONF.workarounds.wait_for_vif_plugged_event_during_hard_reboot)
|
||||||
|
if event_expected_for_vnic_types:
|
||||||
|
# NOTE(gibi): We unplugged every vif during destroy above and we
|
||||||
|
# will replug them with _create_guest_with_network. As the
|
||||||
|
# workaround config has some vnic_types configured we expect
|
||||||
|
# vif-plugged events for every vif with those vnic_types.
|
||||||
|
# TODO(gibi): only wait for events if we know that the networking
|
||||||
|
# backend sends plug time events. For that we need to finish
|
||||||
|
# https://bugs.launchpad.net/neutron/+bug/1821058 first in Neutron
|
||||||
|
# then create a driver -> plug-time event mapping in nova.
|
||||||
|
external_events = [
|
||||||
|
('network-vif-plugged', vif['id'])
|
||||||
|
for vif in network_info
|
||||||
|
if vif['vnic_type'] in event_expected_for_vnic_types
|
||||||
|
]
|
||||||
|
vifs_already_plugged = False
|
||||||
|
|
||||||
# NOTE(efried): The instance should already have a vtpm_secret_uuid
|
# NOTE(efried): The instance should already have a vtpm_secret_uuid
|
||||||
# registered if appropriate.
|
# registered if appropriate.
|
||||||
self._create_guest_with_network(
|
self._create_guest_with_network(
|
||||||
context, xml, instance, network_info, block_device_info,
|
context, xml, instance, network_info, block_device_info,
|
||||||
vifs_already_plugged=True)
|
vifs_already_plugged=vifs_already_plugged,
|
||||||
|
external_events=external_events)
|
||||||
|
|
||||||
def _wait_for_reboot():
|
def _wait_for_reboot():
|
||||||
"""Called at an interval until the VM is running again."""
|
"""Called at an interval until the VM is running again."""
|
||||||
@ -7216,7 +7237,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||||||
power_on: bool = True,
|
power_on: bool = True,
|
||||||
vifs_already_plugged: bool = False,
|
vifs_already_plugged: bool = False,
|
||||||
post_xml_callback: ty.Callable = None,
|
post_xml_callback: ty.Callable = None,
|
||||||
external_events: ty.Optional[ty.List[str]] = None,
|
external_events: ty.Optional[ty.List[ty.Tuple[str, str]]] = None,
|
||||||
cleanup_instance_dir: bool = False,
|
cleanup_instance_dir: bool = False,
|
||||||
cleanup_instance_disks: bool = False,
|
cleanup_instance_disks: bool = False,
|
||||||
) -> libvirt_guest.Guest:
|
) -> libvirt_guest.Guest:
|
||||||
|
18
releasenotes/notes/bug-1946729-wait-for-vif-plugged-event-during-hard-reboot-fb491f6a68370bab.yaml
Normal file
18
releasenotes/notes/bug-1946729-wait-for-vif-plugged-event-during-hard-reboot-fb491f6a68370bab.yaml
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
---
|
||||||
|
issues:
|
||||||
|
- |
|
||||||
|
The libvirt virt driver in Nova implements power on and hard reboot by
|
||||||
|
destroying the domain first and unpluging the vifs then recreating the
|
||||||
|
domain and replugging the vifs. However nova does not wait for the
|
||||||
|
network-vif-plugged event before unpause the domain. This can cause
|
||||||
|
the domain to start running and requesting IP via DHCP before the
|
||||||
|
networking backend has finished plugging the vifs. The config option
|
||||||
|
[workarounds]wait_for_vif_plugged_event_during_hard_reboot has been added,
|
||||||
|
defaulting to an empty list, that can be used to ensure that the libvirt
|
||||||
|
driver waits for the network-vif-plugged event for vifs with specific
|
||||||
|
``vnic_type`` before it unpauses the domain during hard reboot. This should
|
||||||
|
only be used if the deployment uses a networking backend that sends such
|
||||||
|
event for the given ``vif_type`` at vif plug time. The ml2/ovs and the
|
||||||
|
networking-odl Neutron backend is known to send plug time events for ports
|
||||||
|
with ``normal`` ``vnic_type``. For more information see
|
||||||
|
https://bugs.launchpad.net/nova/+bug/1946729
|
Loading…
x
Reference in New Issue
Block a user