Browse Source

Consolidate device detach error handling

After I7f2b6330decb92e2838aa7cee47fb228f00f47da the error handling of
the callers of _detach_with_retry() can be consolidated. Libvirt related
errors now handled in _detach_sync() and various DeviceNotFound cases
now don't need special handling on the caller side.

Change-Id: Ic6eb66bc2396949aceecda50bda334a1bc7dab31
(cherry picked from commit 7317cfbc67)
changes/26/788726/2
Balazs Gibizer 3 months ago
parent
commit
8b50f48ed2
2 changed files with 63 additions and 122 deletions
  1. +56
    -47
      nova/tests/unit/virt/libvirt/test_driver.py
  2. +7
    -75
      nova/virt/libvirt/driver.py

+ 56
- 47
nova/tests/unit/virt/libvirt/test_driver.py View File

@ -18589,6 +18589,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
lambda self, instance: FakeVirtDomain())
instance = objects.Instance(**self.test_instance)
instance.info_cache = None
network_info = _fake_network_info(self)
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
@ -22957,7 +22958,6 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
# This will trigger _detach_with_retry to raise
# DeviceNotFound
get_interface_calls = [
expected_cfg, # detach_interface() itself gets the config
expected_cfg, # _detach_with_retry: persistent config
None, # _detach_with_retry: no device in live config
None, # _detach_with_retry: persistent config gone as detached
@ -22965,7 +22965,6 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
else:
if state in (power_state.RUNNING, power_state.PAUSED):
get_interface_calls = [
expected_cfg, # detach_interface() itself gets the config
expected_cfg, # _detach_with_retry: persistent config
expected_cfg, # _detach_with_retry: live config
None, # _detach_with_retry: persistent config gone
@ -22973,7 +22972,6 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
]
else:
get_interface_calls = [
expected_cfg, # detach_interface() itself gets the config
expected_cfg, # _detach_with_retry: persistent config
None, # _detach_with_retry: persistent config gone
]
@ -23008,7 +23006,6 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
flags=fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG)
mock_get_interface.assert_has_calls(
[
mock.call(expected_cfg),
mock.call(expected_cfg, from_persistent_config=True),
mock.call(expected_cfg),
mock.call(expected_cfg, from_persistent_config=True),
@ -23026,7 +23023,6 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
])
mock_get_interface.assert_has_calls(
[
mock.call(expected_cfg),
mock.call(expected_cfg, from_persistent_config=True),
mock.call(expected_cfg),
mock.call(expected_cfg, from_persistent_config=True),
@ -23038,7 +23034,6 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
flags=fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG)
mock_get_interface.assert_has_calls(
[
mock.call(expected_cfg),
mock.call(expected_cfg, from_persistent_config=True),
mock.call(expected_cfg, from_persistent_config=True),
])
@ -23064,6 +23059,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
# Asserts that we don't log an error when the interface device is not
# found on the guest after a libvirt error during detach.
instance = self._create_instance()
instance.info_cache = None
vif = _fake_network_info(self)[0]
guest = mock.Mock(spec=libvirt_guest.Guest)
guest.get_power_state = mock.Mock()
@ -23081,36 +23077,6 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
self.assertIn('the device is no longer found on the guest',
str(mock_log.warning.call_args[0]))
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver.'
'_detach_with_retry')
@mock.patch('nova.virt.libvirt.driver.LOG')
def test_detach_interface_guest_not_found_after_detach(
self, mock_log, mock_detach_with_retry
):
# Asserts that we don't raise an exception when the guest is gone
# after a libvirt error during detach.
instance = self._create_instance()
vif = _fake_network_info(self, 1)[0]
guest = mock.MagicMock()
guest.get_power_state.return_value = power_state.RUNNING
guest.get_interface_by_cfg.return_value = (
vconfig.LibvirtConfigGuestInterface())
get_guest_mock = mock.Mock()
# Host.get_guest should be called twice: the first time it is found,
# the second time it is gone.
get_guest_mock.side_effect = (
guest, exception.InstanceNotFound(instance_id=instance.uuid))
self.drvr._host.get_guest = get_guest_mock
error = fakelibvirt.libvirtError(
'internal error: End of file from qemu monitor')
error.err = (fakelibvirt.VIR_ERR_OPERATION_FAILED,)
mock_detach_with_retry.side_effect = error
self.drvr.detach_interface(self.context, instance, vif)
self.assertEqual(1, mock_log.info.call_count)
self.assertIn('Instance disappeared while detaching interface',
mock_log.info.call_args[0][0])
get_guest_mock.assert_has_calls([mock.call(instance)] * 2)
@mock.patch('threading.Event.wait', new=mock.Mock())
@mock.patch.object(FakeVirtDomain, 'info')
@mock.patch.object(FakeVirtDomain, 'detachDeviceFlags')
@ -23154,7 +23120,6 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
mock.patch.object(
libvirt_guest.Guest, 'get_interface_by_cfg',
side_effect=[
expected, # detach_interface gets the config
expected, # _detach_with_retry: persistent config
expected, # _detach_with_retry: live config
None, # _detach_with_retry: persistent gone
@ -23168,14 +23133,12 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
mock_get_interface.assert_has_calls(
[
mock.call(expected, ),
mock.call(expected, from_persistent_config=True),
mock.call(expected),
mock.call(expected, from_persistent_config=True),
mock.call(expected),
]
)
self.assertEqual(5, mock_get_interface.call_count)
mock_get_config.assert_called_once_with(
instance, network_info[0], test.MatchType(objects.ImageMeta),
test.MatchType(objects.Flavor), CONF.libvirt.virt_type)
@ -23197,7 +23160,6 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
instance = self._create_instance()
network_info = _fake_network_info(self, num_networks=3)
vif = network_info[0]
interface = vconfig.LibvirtConfigGuestInterface()
image_meta = objects.ImageMeta.from_dict({})
disk_info = blockinfo.get_disk_info(
CONF.libvirt.virt_type, instance, image_meta)
@ -23210,8 +23172,6 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
self.drvr._host, 'get_guest', return_value=guest),
mock.patch.object(
self.drvr.vif_driver, 'get_config', return_value=cfg),
mock.patch.object(
guest, 'get_interface_by_cfg', return_value=interface),
mock.patch.object(
instance, 'get_network_info', return_value=network_info),
mock.patch.object(
@ -23220,16 +23180,15 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
self.drvr, '_get_guest_config_meta', return_value=config_meta),
mock.patch.object(guest, 'set_metadata')
) as (
mock_get_guest, mock_get_config, mock_get_interface_by_cfg,
mock_get_network_info, mock_detach_with_retry,
mock_get_guest_config_meta, mock_set_metadata
mock_get_guest, mock_get_config, mock_get_network_info,
mock_detach_with_retry, mock_get_guest_config_meta,
mock_set_metadata
):
self.drvr.detach_interface(self.context, instance, vif)
mock_get_guest.assert_called_once_with(instance)
mock_get_config.assert_called_once_with(
instance, vif, test.MatchType(objects.ImageMeta),
test.MatchType(objects.Flavor), CONF.libvirt.virt_type)
mock_get_interface_by_cfg.assert_called_once_with(cfg)
mock_detach_with_retry.assert_called_once_with(
guest, instance.uuid, mock.ANY, device_name=None)
mock_get_network_info.assert_called_once_with()
@ -23791,6 +23750,56 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
mock_guest.detach_device.assert_called_once_with(
mock_dev, persistent=True, live=False)
@mock.patch.object(libvirt_driver.LOG, 'warning')
def test__detach_with_retry_libvirt_reports_domain_not_found(
self, mock_warning
):
"""Test that libvirt reports that the domain is not found and assert
that it is only logged.
"""
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
mock_guest = mock.Mock(spec=libvirt_guest.Guest)
mock_guest.get_power_state.return_value = power_state.SHUTDOWN
mock_dev = mock.Mock(spec=vconfig.LibvirtConfigGuestDisk)
mock_dev.alias = 'virtio-disk1'
mock_get_device_conf_func = mock.Mock(
# The first call is to get the device from the persistent domain
# before the detach. The second calls to double check that the
# device is gone after libvirt returned. This second call is
# pointless in the current case as libvirt returned that the domain
# does not exists. So the code could know that there is no device.
# Still for simplicity the call is made and expected to return None
side_effect=[
mock_dev,
None,
]
)
mock_guest.detach_device.side_effect = fakelibvirt.make_libvirtError(
fakelibvirt.libvirtError,
msg='error',
error_code=fakelibvirt.VIR_ERR_NO_DOMAIN)
drvr._detach_with_retry(
mock_guest,
uuids.instance_uuid,
mock_get_device_conf_func,
device_name='vdb',
)
mock_guest.has_persistent_configuration.assert_called_once_with()
mock_get_device_conf_func.assert_has_calls([
mock.call(from_persistent_config=True),
mock.call(from_persistent_config=True),
])
mock_guest.detach_device.assert_called_once_with(
mock_dev, persistent=True, live=False)
mock_warning.assert_called_once_with(
'During device detach, instance disappeared.',
instance_uuid=uuids.instance_uuid)
@ddt.data(power_state.RUNNING, power_state.PAUSED)
def test__detach_with_retry_other_sync_libvirt_error(self, state):
"""Test that libvirt reports non device related error during detach
@ -23809,7 +23818,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
mock_guest.detach_device.side_effect = fakelibvirt.make_libvirtError(
fakelibvirt.libvirtError,
msg='error',
error_code=fakelibvirt.VIR_ERR_NO_DOMAIN)
error_code=fakelibvirt.VIR_ERR_INTERNAL_ERROR)
self.assertRaises(
fakelibvirt.libvirtError,


+ 7
- 75
nova/virt/libvirt/driver.py View File

@ -2526,6 +2526,13 @@ class LibvirtDriver(driver.ComputeDriver):
'still being in progress.', device_name, instance_uuid)
return
if code == libvirt.VIR_ERR_NO_DOMAIN:
LOG.warning(
"During device detach, instance disappeared.",
instance_uuid=instance_uuid)
# if the domain has disappeared then we have nothing to detach
return
LOG.warning(
'Unexpected libvirt error while detaching device %s from '
'instance %s: %s', device_name, instance_uuid, str(ex))
@ -2560,17 +2567,6 @@ class LibvirtDriver(driver.ComputeDriver):
# call.
LOG.info("Device %s not found in instance.",
disk_dev, instance=instance)
except libvirt.libvirtError as ex:
# NOTE(vish): This is called to cleanup volumes after live
# migration, so we should still disconnect even if
# the instance doesn't exist here anymore.
error_code = ex.get_error_code()
if error_code == libvirt.VIR_ERR_NO_DOMAIN:
# NOTE(vish):
LOG.warning("During detach_volume, instance disappeared.",
instance=instance)
else:
raise
self._disconnect_volume(context, connection_info, instance,
encryption=encryption)
@ -2737,21 +2733,7 @@ class LibvirtDriver(driver.ComputeDriver):
instance.image_meta,
instance.flavor,
CONF.libvirt.virt_type)
interface = guest.get_interface_by_cfg(cfg)
try:
# NOTE(mriedem): When deleting an instance and using Neutron,
# we can be racing against Neutron deleting the port and
# sending the vif-deleted event which then triggers a call to
# detach the interface, so if the interface is not found then
# we can just log it as a warning.
if not interface:
mac = vif.get('address')
# The interface is gone so just log it as a warning.
LOG.warning('Detaching interface %(mac)s failed because '
'the device is no longer found on the guest.',
{'mac': mac}, instance=instance)
return
get_dev = functools.partial(guest.get_interface_by_cfg, cfg)
self._detach_with_retry(
guest,
@ -2759,61 +2741,11 @@ class LibvirtDriver(driver.ComputeDriver):
get_dev,
device_name=self.vif_driver.get_vif_devname(vif),
)
except exception.DeviceDetachFailed:
# We failed to detach the device even with the retry loop, so let's
# dump some debug information to the logs before raising back up.
with excutils.save_and_reraise_exception():
devname = self.vif_driver.get_vif_devname(vif)
interface = guest.get_interface_by_cfg(cfg)
if interface:
LOG.warning(
'Failed to detach interface %(devname)s after '
'repeated attempts. Final interface xml:\n'
'%(interface_xml)s\nFinal guest xml:\n%(guest_xml)s',
{'devname': devname,
'interface_xml': interface.to_xml(),
'guest_xml': guest.get_xml_desc()},
instance=instance)
except exception.DeviceNotFound:
# The interface is gone so just log it as a warning.
LOG.warning('Detaching interface %(mac)s failed because '
'the device is no longer found on the guest.',
{'mac': vif.get('address')}, instance=instance)
except libvirt.libvirtError as ex:
error_code = ex.get_error_code()
if error_code == libvirt.VIR_ERR_NO_DOMAIN:
LOG.warning("During detach_interface, instance disappeared.",
instance=instance)
else:
# NOTE(mriedem): When deleting an instance and using Neutron,
# we can be racing against Neutron deleting the port and
# sending the vif-deleted event which then triggers a call to
# detach the interface, so we might have failed because the
# network device no longer exists. Libvirt will fail with
# "operation failed: no matching network device was found"
# which unfortunately does not have a unique error code so we
# need to look up the interface by config and if it's not found
# then we can just log it as a warning rather than tracing an
# error.
mac = vif.get('address')
# Get a fresh instance of the guest in case it is gone.
try:
guest = self._host.get_guest(instance)
except exception.InstanceNotFound:
LOG.info("Instance disappeared while detaching interface "
"%s", vif['id'], instance=instance)
return
interface = guest.get_interface_by_cfg(cfg)
if interface:
LOG.error('detaching network adapter failed.',
instance=instance, exc_info=True)
raise exception.InterfaceDetachFailed(
instance_uuid=instance.uuid)
# The interface is gone so just log it as a warning.
LOG.warning('Detaching interface %(mac)s failed because '
'the device is no longer found on the guest.',
{'mac': mac}, instance=instance)
finally:
# NOTE(gibi): we need to unplug the vif _after_ the detach is done
# on the libvirt side as otherwise libvirt will still manage the


Loading…
Cancel
Save