libvirt: Ignore device already in the process of unplug errors

At present QEMU will raise an error to libvirt when a device_del request
is made for a device that has already partially detached through a
previous request. This is outlined in more detail in the following
downstream Red Hat QEMU bug report:

Get libvirtError "Device XX is already in the process of unplug" [..]
https://bugzilla.redhat.com/show_bug.cgi?id=1878659

Within Nova we can actually ignore this error and allow our existing
retry logic to attempt again after a short wait, hopefully allowing the
original request to complete removing the device from the domain.

This change does this and should result in one of the following
device_del requests raising a VIR_ERR_DEVICE_MISSING error from libvirt.
_try_detach_device should then translate that libvirt error into a
DeviceNotFound exception which is itself then ignored by all
detach_device_with_retry callers and taken to mean that the device has
detached successfully.

Closes-Bug: #1923206
Change-Id: I0e068043d8267ab91535413d950a3e154c2234f7
(cherry picked from commit 0a7d3794c6)
This commit is contained in:
Lee Yarwood 2021-04-09 15:37:23 +01:00
parent 68af588d5c
commit 972a86d61f
2 changed files with 59 additions and 2 deletions

View File

@ -377,6 +377,48 @@ class GuestTestCase(test.NoDBTestCase):
error_message="device not found: disk vdb not found",
supports_device_missing=True)
def test_detach_device_with_already_in_process_of_unplug_error(self):
# Assert that DeviceNotFound is raised when encountering
# https://bugzilla.redhat.com/show_bug.cgi?id=1878659
# This is raised as QEMU returns a VIR_ERR_INTERNAL_ERROR when
# a request to device_del is made while another is about to complete.
self.domain.isPersistent.return_value = True
conf = mock.Mock(spec=vconfig.LibvirtConfigGuestDevice)
conf.to_xml.return_value = "</xml>"
existing_unplug_exc = fakelibvirt.make_libvirtError(
fakelibvirt.libvirtError, "",
error_message='device vdb is already in the process of unplug',
error_code=fakelibvirt.VIR_ERR_INTERNAL_ERROR,
error_domain=fakelibvirt.VIR_FROM_DOMAIN
)
device_missing_exc = fakelibvirt.make_libvirtError(
fakelibvirt.libvirtError, "",
error_message='device not found: disk vdb not found',
error_code=fakelibvirt.VIR_ERR_DEVICE_MISSING,
error_domain=fakelibvirt.VIR_FROM_DOMAIN
)
# Raise VIR_ERR_INTERNAL_ERROR on the second call before raising
# VIR_ERR_DEVICE_MISSING to mock the first call successfully detaching
# the device asynchronously.
self.domain.detachDeviceFlags.side_effect = [
None,
existing_unplug_exc,
device_missing_exc
]
retry_detach = self.guest.detach_device_with_retry(
mock.Mock(return_value=conf),
'vdb',
live=True,
inc_sleep_time=.01
)
# Assert that we raise exception.DeviceNotFound
self.assertRaises(exception.DeviceNotFound, retry_detach)
def test_get_xml_desc(self):
self.guest.get_xml_desc()
self.domain.XMLDesc.assert_called_once_with(flags=0)

View File

@ -434,12 +434,27 @@ class Guest(object):
LOG.debug('Successfully detached device %s from guest. '
'Persistent? %s. Live? %s',
device, persistent, live)
except libvirt.libvirtError as ex:
with excutils.save_and_reraise_exception(reraise=False) as ctx:
if ex.get_error_code() == libvirt.VIR_ERR_DEVICE_MISSING:
code = ex.get_error_code()
msg = ex.get_error_message()
if code == libvirt.VIR_ERR_DEVICE_MISSING:
raise exception.DeviceNotFound(
device=alternative_device_name)
# NOTE(lyarwood): https://bugzilla.redhat.com/1878659
# Ignore this known QEMU bug for the time being allowing
# our retry logic to fire again and hopefully see that
# the device has been removed asynchronously by QEMU
# in the meantime when the next call to detach raises
# VIR_ERR_DEVICE_MISSING.
if (code == libvirt.VIR_ERR_INTERNAL_ERROR and
msg and 'already in the process of unplug' in msg
):
LOG.debug('Ignoring QEMU rejecting our request to '
'detach as it is caused by a previous '
'request still being in progress.')
return
# Re-raise the original exception if we're not raising
# DeviceNotFound instead. This will avoid logging of a
# "Original exception being dropped" traceback.