diff --git a/nova/conf/libvirt.py b/nova/conf/libvirt.py
index 80984c5a970b..5d89dbe7396f 100644
--- a/nova/conf/libvirt.py
+++ b/nova/conf/libvirt.py
@@ -875,6 +875,30 @@ Related options:
* It's recommended to consider including ``x86_64=q35`` in
:oslo.config:option:`libvirt.hw_machine_type`; see
:ref:`deploying-sev-capable-infrastructure` for more on this.
+"""),
+ cfg.IntOpt('device_detach_attempts',
+ default=8,
+ min=1,
+ help="""
+Maximum number of attempts the driver tries to detach a device in libvirt.
+
+Related options:
+
+* :oslo.config:option:`libvirt.device_detach_timeout`
+
+"""),
+ cfg.IntOpt('device_detach_timeout',
+ default=20,
+ min=1,
+ help="""
+Maximum number of seconds the driver waits for the success or the failure
+event from libvirt for a given device detach attempt before it re-trigger the
+detach.
+
+Related options:
+
+* :oslo.config:option:`libvirt.device_detach_attempts`
+
"""),
]
diff --git a/nova/tests/unit/virt/libvirt/fakelibvirt.py b/nova/tests/unit/virt/libvirt/fakelibvirt.py
index 115b490984d1..5164192f3db1 100644
--- a/nova/tests/unit/virt/libvirt/fakelibvirt.py
+++ b/nova/tests/unit/virt/libvirt/fakelibvirt.py
@@ -1278,9 +1278,17 @@ class Domain(object):
self.attachDevice(xml)
def detachDevice(self, xml):
+ # TODO(gibi): this should handle nics similarly to attachDevice()
disk_info = _parse_disk_info(etree.fromstring(xml))
- disk_info['_attached'] = True
- return disk_info in self._def['devices']['disks']
+ attached_disk_info = None
+ for attached_disk in self._def['devices']['disks']:
+ if attached_disk['target_dev'] == disk_info.get('target_dev'):
+ attached_disk_info = attached_disk
+ break
+
+ if attached_disk_info:
+ self._def['devices']['disks'].remove(attached_disk_info)
+ return attached_disk_info is not None
def detachDeviceFlags(self, xml, flags):
self.detachDevice(xml)
diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py
index faa166f903e4..90eb7305b1c8 100644
--- a/nova/tests/unit/virt/libvirt/test_driver.py
+++ b/nova/tests/unit/virt/libvirt/test_driver.py
@@ -9299,6 +9299,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
mock_build_metadata.assert_called_with(self.context, instance)
mock_save.assert_called_with()
+ @mock.patch('threading.Event', new=mock.Mock())
@mock.patch('nova.virt.libvirt.host.Host._get_domain')
def test_detach_volume_with_vir_domain_affect_live_flag(self,
mock_get_domain):
@@ -9319,8 +9320,12 @@ class LibvirtConnTestCase(test.NoDBTestCase,
mock_dom = mock.MagicMock()
# Second time don't return anything about disk vdc so it looks removed
- return_list = [mock_xml_with_disk, mock_xml_without_disk,
- mock_xml_without_disk]
+ return_list = [
+ mock_xml_with_disk, # presistent domain
+ mock_xml_with_disk, # live domain
+ mock_xml_without_disk, # persistent gone
+ mock_xml_without_disk # live gone
+ ]
# Doubling the size of return list because we test with two guest power
# states
mock_dom.XMLDesc.side_effect = return_list + return_list
@@ -9328,25 +9333,36 @@ class LibvirtConnTestCase(test.NoDBTestCase,
connection_info = {"driver_volume_type": "fake",
"data": {"device_path": "/fake",
"access_mode": "rw"}}
- flags = (fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG |
- fakelibvirt.VIR_DOMAIN_AFFECT_LIVE)
with mock.patch.object(drvr, '_disconnect_volume') as \
mock_disconnect_volume:
for state in (power_state.RUNNING, power_state.PAUSED):
+ mock_dom.reset_mock()
mock_dom.info.return_value = [state, 512, 512, 2, 1234, 5678]
mock_get_domain.return_value = mock_dom
drvr.detach_volume(
self.context, connection_info, instance, '/dev/vdc')
mock_get_domain.assert_called_with(instance)
- call = mock_dom.detachDeviceFlags.mock_calls[0]
xml = """
- """
+ """
+ # we expect two separate detach calls
+ self.assertEqual(2, mock_dom.detachDeviceFlags.call_count)
+ # one for the persistent domain
+ call = mock_dom.detachDeviceFlags.mock_calls[0]
self.assertXmlEqual(xml, call.args[0])
- self.assertEqual({"flags": flags}, call.kwargs)
+ self.assertEqual(
+ {"flags": fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG},
+ call.kwargs)
+ # and one for the live domain
+ call = mock_dom.detachDeviceFlags.mock_calls[1]
+ self.assertXmlEqual(xml, call.args[0])
+ self.assertEqual(
+ {"flags": fakelibvirt.VIR_DOMAIN_AFFECT_LIVE},
+ call.kwargs)
+
mock_disconnect_volume.assert_called_with(
self.context, connection_info, instance, encryption=None)
@@ -9406,11 +9422,14 @@ class LibvirtConnTestCase(test.NoDBTestCase,
mock_disconnect_volume.assert_called_once_with(
self.context, connection_info, instance, encryption=encryption)
+ @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.'
+ '_detach_with_retry')
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_volume_driver')
@mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_volume_encryptor')
@mock.patch('nova.virt.libvirt.host.Host.get_guest')
def test_detach_volume_order_with_encryptors(self, mock_get_guest,
- mock_get_encryptor, mock_get_volume_driver):
+ mock_get_encryptor, mock_get_volume_driver, mock_detach_with_retry
+ ):
mock_volume_driver = mock.MagicMock(
spec=volume_drivers.LibvirtBaseVolumeDriver)
@@ -9425,7 +9444,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
mock_order = mock.Mock()
mock_order.attach_mock(mock_volume_driver.disconnect_volume,
'disconnect_volume')
- mock_order.attach_mock(mock_guest.detach_device_with_retry(),
+ mock_order.attach_mock(mock_detach_with_retry,
'detach_volume')
mock_order.attach_mock(mock_encryptor.detach_volume,
'detach_encryptor')
@@ -9441,9 +9460,20 @@ class LibvirtConnTestCase(test.NoDBTestCase,
encryption=encryption)
mock_order.assert_has_calls([
- mock.call.detach_volume(),
+ mock.call.detach_volume(
+ mock_guest,
+ instance.uuid,
+ # it is functools.partial(mock_guest.get_disk, 'vdc')
+ # see assert below
+ mock.ANY,
+ device_name='vdc',
+ live=True
+ ),
mock.call.detach_encryptor(**encryption),
mock.call.disconnect_volume(connection_info, instance)])
+ get_device_conf_func = mock_detach_with_retry.mock_calls[0][1][2]
+ self.assertEqual(mock_guest.get_disk, get_device_conf_func.func)
+ self.assertEqual(('vdc',), get_device_conf_func.args)
def test_extend_volume(self):
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
@@ -15589,12 +15619,16 @@ class LibvirtConnTestCase(test.NoDBTestCase,
self.assertFalse(service_mock.disabled)
self.assertIsNone(service_mock.disabled_reason)
+ @mock.patch.object(libvirt_driver.AsyncDeviceEventsHandler,
+ 'cleanup_waiters')
@mock.patch.object(libvirt_driver.LibvirtDriver, 'delete_instance_files')
@mock.patch.object(host.Host, '_get_domain',
side_effect=exception.InstanceNotFound(
instance_id=uuids.instance))
@mock.patch.object(objects.Instance, 'save')
- def test_immediate_delete(self, mock_save, mock_get, mock_delete):
+ def test_immediate_delete(self, mock_save, mock_get, mock_delete,
+ mock_cleanup_async_detach
+ ):
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
instance = objects.Instance(self.context, **self.test_instance)
@@ -15603,6 +15637,7 @@ class LibvirtConnTestCase(test.NoDBTestCase,
mock_get.assert_has_calls([mock.call(instance)] * 3)
self.assertEqual(3, mock_get.call_count)
mock_delete.assert_called_once_with(instance)
+ mock_cleanup_async_detach.assert_called_once_with(instance.uuid)
@mock.patch.object(objects.Instance, 'get_by_uuid')
@mock.patch.object(objects.Instance, 'obj_load_attr', autospec=True)
@@ -22944,8 +22979,8 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
self._test_attach_interface(
power_state.SHUTDOWN, fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG)
- def _test_detach_interface(self, power_state, expected_flags,
- device_not_found=False):
+ @mock.patch('threading.Event.wait', new=mock.Mock)
+ def _test_detach_interface(self, state, device_not_found=False):
# setup some mocks
instance = self._create_instance()
network_info = _fake_network_info(self)
@@ -22962,7 +22997,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
""",
- info=[power_state, 1, 2, 3, 4])
+ info=[state, 1, 2, 3, 4])
guest = libvirt_guest.Guest(domain)
expected_cfg = vconfig.LibvirtConfigGuestInterface()
@@ -22975,11 +23010,28 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
""")
if device_not_found:
- # This will trigger detach_device_with_retry to raise
+ # This will trigger _detach_with_retry to raise
# DeviceNotFound
- get_interface_calls = [expected_cfg, None]
+ get_interface_calls = [
+ expected_cfg, # detach_interface() itself gets the config
+ expected_cfg, # _detach_with_retry: persistent config
+ None, # _detach_with_retry: no device in live config
+ ]
else:
- get_interface_calls = [expected_cfg, expected_cfg, None, None]
+ if state in (power_state.RUNNING, power_state.PAUSED):
+ get_interface_calls = [
+ expected_cfg, # detach_interface() itself gets the config
+ expected_cfg, # _detach_with_retry: persistent config
+ expected_cfg, # _detach_with_retry: live config
+ None, # _detach_with_retry: persistent config gone
+ None # _detach_with_retry: live config gone
+ ]
+ else:
+ get_interface_calls = [
+ expected_cfg, # detach_interface() itself gets the config
+ expected_cfg, # _detach_with_retry: persistent config
+ None, # _detach_with_retry: persistent config gone
+ ]
with test.nested(
mock.patch.object(host.Host, 'get_guest', return_value=guest),
@@ -23006,45 +23058,60 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
mock_get_config.assert_called_once_with(
instance, network_info[0], test.MatchType(objects.ImageMeta),
test.MatchType(objects.Flavor), CONF.libvirt.virt_type)
- mock_get_interface.assert_has_calls(
- [mock.call(expected_cfg) for x in range(len(get_interface_calls))])
-
if device_not_found:
mock_detach_device_flags.assert_not_called()
self.assertTrue(mock_warning.called)
+ mock_get_interface.assert_has_calls(
+ [
+ mock.call(expected_cfg),
+ mock.call(expected_cfg, from_persistent_config=True),
+ mock.call(expected_cfg),
+ ])
else:
- mock_detach_device_flags.assert_called_once_with(
- expected_cfg.to_xml(), flags=expected_flags)
- mock_warning.assert_not_called()
mock_get_network_info.assert_called_once_with()
+ if state in (power_state.RUNNING, power_state.PAUSED):
+ mock_detach_device_flags.assert_has_calls([
+ mock.call(
+ expected_cfg.to_xml(),
+ flags=fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG),
+ mock.call(
+ expected_cfg.to_xml(),
+ flags=fakelibvirt.VIR_DOMAIN_AFFECT_LIVE),
+ ])
+ mock_get_interface.assert_has_calls(
+ [
+ mock.call(expected_cfg),
+ mock.call(expected_cfg, from_persistent_config=True),
+ mock.call(expected_cfg),
+ mock.call(expected_cfg, from_persistent_config=True),
+ mock.call(expected_cfg),
+ ])
+ else:
+ mock_detach_device_flags.assert_called_once_with(
+ expected_cfg.to_xml(),
+ flags=fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG)
+ mock_get_interface.assert_has_calls(
+ [
+ mock.call(expected_cfg),
+ mock.call(expected_cfg, from_persistent_config=True),
+ mock.call(expected_cfg, from_persistent_config=True),
+ ])
mock_unplug.assert_called_once_with(instance, network_info[0])
def test_detach_interface_with_running_instance(self):
- self._test_detach_interface(
- power_state.RUNNING,
- expected_flags=(fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG |
- fakelibvirt.VIR_DOMAIN_AFFECT_LIVE))
+ self._test_detach_interface(power_state.RUNNING)
def test_detach_interface_with_running_instance_device_not_found(self):
"""Tests that the interface is detached before we try to detach it.
"""
- self._test_detach_interface(
- power_state.RUNNING,
- expected_flags=(fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG |
- fakelibvirt.VIR_DOMAIN_AFFECT_LIVE),
- device_not_found=True)
+ self._test_detach_interface(power_state.RUNNING, device_not_found=True)
def test_detach_interface_with_pause_instance(self):
- self._test_detach_interface(
- power_state.PAUSED,
- expected_flags=(fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG |
- fakelibvirt.VIR_DOMAIN_AFFECT_LIVE))
+ self._test_detach_interface(power_state.PAUSED)
def test_detach_interface_with_shutdown_instance(self):
- self._test_detach_interface(
- power_state.SHUTDOWN,
- expected_flags=(fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG))
+ self._test_detach_interface(power_state.SHUTDOWN)
@mock.patch('nova.virt.libvirt.driver.LOG')
def test_detach_interface_device_not_found(self, mock_log):
@@ -23068,8 +23135,12 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
self.assertIn('the device is no longer found on the guest',
str(mock_log.warning.call_args[0]))
+ @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.'
+ '_detach_with_retry')
@mock.patch('nova.virt.libvirt.driver.LOG')
- def test_detach_interface_guest_not_found_after_detach(self, mock_log):
+ def test_detach_interface_guest_not_found_after_detach(
+ self, mock_log, mock_detach_with_retry
+ ):
# Asserts that we don't raise an exception when the guest is gone
# after a libvirt error during detach.
instance = self._create_instance()
@@ -23087,13 +23158,14 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
error = fakelibvirt.libvirtError(
'internal error: End of file from qemu monitor')
error.err = (fakelibvirt.VIR_ERR_OPERATION_FAILED,)
- guest.detach_device_with_retry.side_effect = error
+ mock_detach_with_retry.side_effect = error
self.drvr.detach_interface(self.context, instance, vif)
self.assertEqual(1, mock_log.info.call_count)
self.assertIn('Instance disappeared while detaching interface',
mock_log.info.call_args[0][0])
get_guest_mock.assert_has_calls([mock.call(instance)] * 2)
+ @mock.patch('threading.Event.wait', new=mock.Mock())
@mock.patch.object(FakeVirtDomain, 'info')
@mock.patch.object(FakeVirtDomain, 'detachDeviceFlags')
@mock.patch.object(host.Host, '_get_domain')
@@ -23132,26 +23204,46 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
""")
- expected_flags = (fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG |
- fakelibvirt.VIR_DOMAIN_AFFECT_LIVE)
with test.nested(
- mock.patch.object(libvirt_guest.Guest, 'get_interface_by_cfg',
- side_effect=[expected, expected, None, None]),
+ mock.patch.object(
+ libvirt_guest.Guest, 'get_interface_by_cfg',
+ side_effect=[
+ expected, # detach_interface gets the config
+ expected, # _detach_with_retry: persistent config
+ expected, # _detach_with_retry: live config
+ None, # _detach_with_retry: persistent gone
+ None # _detach_with_retry: live gone
+ ]),
mock.patch.object(self.drvr.vif_driver, 'get_config',
return_value=expected),
mock.patch.object(instance, 'get_network_info')
) as (mock_get_interface, mock_get_config, mock_get_network_info):
self.drvr.detach_interface(self.context, instance, network_info[0])
- mock_get_interface.assert_has_calls([mock.call(expected)] * 3)
- self.assertEqual(4, mock_get_interface.call_count)
+ mock_get_interface.assert_has_calls(
+ [
+ mock.call(expected, ),
+ mock.call(expected, from_persistent_config=True),
+ mock.call(expected),
+ mock.call(expected, from_persistent_config=True),
+ mock.call(expected),
+ ]
+ )
+ self.assertEqual(5, mock_get_interface.call_count)
mock_get_config.assert_called_once_with(
instance, network_info[0], test.MatchType(objects.ImageMeta),
test.MatchType(objects.Flavor), CONF.libvirt.virt_type)
mock_get_domain.assert_called_once_with(instance)
mock_info.assert_called_once_with()
- mock_detach.assert_called_once_with(expected.to_xml(),
- flags=expected_flags)
+ mock_detach.assert_has_calls(
+ [
+ mock.call(
+ expected.to_xml(),
+ flags=fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG),
+ mock.call(
+ expected.to_xml(),
+ flags=fakelibvirt.VIR_DOMAIN_AFFECT_LIVE),
+ ])
mock_get_network_info.assert_called_once_with()
def test_detach_interface_guest_set_metadata(self):
@@ -23165,7 +23257,6 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
CONF.libvirt.virt_type, instance, image_meta)
cfg = self.drvr._get_guest_config(
instance, network_info, image_meta, disk_info)
- mock_wait_for_detach = mock.Mock()
config_meta = vconfig.LibvirtConfigGuestMetaNovaInstance()
with test.nested(
@@ -23178,15 +23269,15 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
mock.patch.object(guest, 'get_power_state'),
mock.patch.object(
instance, 'get_network_info', return_value=network_info),
- mock.patch.object(guest,
- 'detach_device_with_retry', return_value=mock_wait_for_detach),
+ mock.patch.object(
+ self.drvr, '_detach_with_retry'),
mock.patch.object(
self.drvr, '_get_guest_config_meta', return_value=config_meta),
mock.patch.object(guest, 'set_metadata')
) as (
mock_get_guest, mock_get_config, mock_get_interface_by_cfg,
mock_get_power_state, mock_get_network_info,
- mock_detach_device_with_retry, mock_get_guest_config_meta,
+ mock_detach_with_retry, mock_get_guest_config_meta,
mock_set_metadata
):
self.drvr.detach_interface(self.context, instance, vif)
@@ -23196,15 +23287,606 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
test.MatchType(objects.Flavor), CONF.libvirt.virt_type)
mock_get_interface_by_cfg.assert_called_once_with(cfg)
mock_get_power_state.assert_called_once_with(self.drvr._host)
- mock_detach_device_with_retry.assert_called_once_with(
- guest.get_interface_by_cfg, cfg, live=False,
- alternative_device_name=None)
- mock_wait_for_detach.assert_called_once_with()
+ mock_detach_with_retry.assert_called_once_with(
+ guest, instance.uuid, mock.ANY, live=False,
+ device_name=None)
mock_get_network_info.assert_called_once_with()
mock_get_guest_config_meta.assert_called_once_with(
instance, network_info[1:])
mock_set_metadata.assert_called_once_with(config_meta)
+ def test__detach_with_retry_persistent_success(self):
+ """Test detach only from the persistent domain"""
+ drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
+ mock_guest = mock.Mock(spec=libvirt_guest.Guest)
+ mock_guest.has_persistent_configuration.return_value = True
+
+ mock_dev = mock.Mock(spec=vconfig.LibvirtConfigGuestDisk)
+ mock_dev.alias = 'virtio-disk1'
+
+ mock_get_device_conf_func = mock.Mock(
+ # the first call is to get the device from the persistent domain
+ # the second call is to double check that the dev is gone from the
+ # persistent domain
+ side_effect=[
+ mock_dev,
+ None,
+ ]
+ )
+
+ drvr._detach_with_retry(
+ mock_guest,
+ uuids.instance_uuid,
+ mock_get_device_conf_func,
+ device_name='vdb',
+ live=False,
+ )
+
+ mock_guest.has_persistent_configuration.assert_called_once_with()
+ mock_get_device_conf_func.assert_has_calls(
+ [
+ mock.call(from_persistent_config=True),
+ mock.call(from_persistent_config=True),
+ ])
+ mock_guest.detach_device.assert_called_once_with(
+ mock_dev,
+ persistent=True,
+ live=False)
+ # check that the internal event handling is cleaned up
+ self.assertEqual(set(), drvr._device_event_handler._waiters)
+
+ def test__detach_with_retry_live_success(self):
+ """Test detach only from the live domain"""
+ drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
+ mock_guest = mock.Mock(spec=libvirt_guest.Guest)
+ mock_guest.has_persistent_configuration.return_value = False
+
+ mock_dev = mock.Mock(spec=vconfig.LibvirtConfigGuestDisk)
+ mock_dev.alias = 'virtio-disk1'
+
+ mock_get_device_conf_func = mock.Mock(
+ # the first call is to get the device from the live domain
+ # the second call is to double check that the dev is gone from the
+ # live domain
+ side_effect=[
+ mock_dev,
+ None,
+ ]
+ )
+ # simulate that libvirt sends an event
+ mock_guest.detach_device.side_effect = (
+ lambda dev, persistent, live: drvr.emit_event(
+ libvirtevent.DeviceRemovedEvent(uuids.instance_uuid, dev.alias)
+ ))
+
+ drvr._detach_with_retry(
+ mock_guest,
+ uuids.instance_uuid,
+ mock_get_device_conf_func,
+ device_name='vdb',
+ live=True,
+ )
+
+ mock_guest.has_persistent_configuration.assert_called_once_with()
+ mock_get_device_conf_func.assert_has_calls(
+ [
+ mock.call(),
+ mock.call(),
+ ])
+ mock_guest.detach_device.assert_called_once_with(
+ mock_dev,
+ persistent=False,
+ live=True)
+ # check that the internal event handling is cleaned up
+ self.assertEqual(set(), drvr._device_event_handler._waiters)
+
+ def test__detach_with_retry_persistent_and_live_success(self):
+ """Test detach both from the persistent and from the live domains"""
+ drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
+ mock_guest = mock.Mock(spec=libvirt_guest.Guest)
+ mock_guest.has_persistent_configuration.return_value = True
+
+ mock_dev = mock.Mock(spec=vconfig.LibvirtConfigGuestDisk)
+ mock_dev.alias = 'virtio-disk1'
+
+ mock_get_device_conf_func = mock.Mock(
+ # the first call is to get the device from the persistent domain
+ # the second call is to get the device from the live domain
+ # the third call is to double check that the dev is gone from the
+ # persistent domain
+ # the fourth call is to double check that the dev is gone from the
+ # live domain
+ side_effect=[
+ mock_dev,
+ mock_dev,
+ None,
+ None,
+ ]
+ )
+
+ # simulate that libvirt sends an event for the live detach call
+ # for the persistent detach call no side effect needs to be simulated
+ def fake_detach(dev, persistent, live):
+ if live:
+ drvr.emit_event(libvirtevent.DeviceRemovedEvent(
+ uuids.instance_uuid, dev.alias))
+
+ mock_guest.detach_device.side_effect = fake_detach
+
+ drvr._detach_with_retry(
+ mock_guest,
+ uuids.instance_uuid,
+ mock_get_device_conf_func,
+ device_name='vdb',
+ live=True,
+ )
+
+ mock_guest.has_persistent_configuration.assert_called_once_with()
+ mock_get_device_conf_func.assert_has_calls(
+ [
+ mock.call(from_persistent_config=True),
+ mock.call(),
+ mock.call(from_persistent_config=True),
+ mock.call(),
+ ])
+ # we detach from the persistent config first and then from the live
+ mock_guest.detach_device.assert_has_calls(
+ [
+ mock.call(mock_dev, persistent=True, live=False),
+ mock.call(mock_dev, persistent=False, live=True),
+ ])
+ # check that the internal event handling is cleaned up
+ self.assertEqual(set(), drvr._device_event_handler._waiters)
+
+ def test__detach_with_retry_persistent_but_guest_is_not(self):
+ """Test that a pesistent only detach is requested but the guest is
+ not persistent.
+ """
+ drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
+ mock_guest = mock.Mock(spec=libvirt_guest.Guest)
+ mock_guest.has_persistent_configuration.return_value = False
+
+ # we expect that no device query or actual detach happens.
+ mock_get_device_conf_func = mock.NonCallableMock()
+ mock_guest.detach_device = mock.NonCallableMock()
+
+ drvr._detach_with_retry(
+ mock_guest,
+ uuids.instance_uuid,
+ mock_get_device_conf_func,
+ device_name='vdb',
+ live=False,
+ )
+
+ mock_guest.has_persistent_configuration.assert_called_once_with()
+
+ def test__detach_with_retry_persistent_dev_not_found(self):
+ """Tests that expection is raised if a persistent detach is requested,
+ the domain is persistent but the device is not found.
+ """
+ drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
+ mock_guest = mock.Mock(spec=libvirt_guest.Guest)
+ mock_guest.has_persistent_configuration.return_value = True
+
+ # we simulate that the device does not exists in the domain even
+ # before we try to detach
+ mock_get_device_conf_func = mock.Mock(return_value=None)
+
+ self.assertRaises(
+ exception.DeviceNotFound,
+ drvr._detach_with_retry,
+ mock_guest,
+ uuids.instance_uuid,
+ mock_get_device_conf_func,
+ device_name='vdb',
+ live=False,
+ )
+
+ mock_get_device_conf_func.assert_called_once_with(
+ from_persistent_config=True)
+ mock_guest.detach_device.assert_not_called()
+
+ def test__detach_with_retry_live_dev_not_found(self):
+ """Tests that expection is raised if a live detach is requested,
+ but the device is not found in the live domain.
+ """
+ drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
+ mock_guest = mock.Mock(spec=libvirt_guest.Guest)
+ mock_guest.has_persistent_configuration.return_value = False
+
+ # we simulate that the device does not exists in the domain even
+ # before we try to detach
+ mock_get_device_conf_func = mock.Mock(return_value=None)
+
+ self.assertRaises(
+ exception.DeviceNotFound,
+ drvr._detach_with_retry,
+ mock_guest,
+ uuids.instance_uuid,
+ mock_get_device_conf_func,
+ device_name='vdb',
+ live=True,
+ )
+
+ mock_get_device_conf_func.assert_called_once_with()
+ mock_guest.detach_device.assert_not_called()
+
+ def test__detach_with_retry_async_fail(self):
+ """Test that libvirt sends error event during detach"""
+ drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
+ mock_guest = mock.Mock(spec=libvirt_guest.Guest)
+ # for simplycity do a live only detach
+ mock_guest.has_persistent_configuration.return_value = False
+
+ mock_dev = mock.Mock(spec=vconfig.LibvirtConfigGuestDisk)
+ mock_dev.alias = 'virtio-disk1'
+
+ mock_get_device_conf_func = mock.Mock(return_value=mock_dev)
+
+ # simulate that libvirt sends an failed event
+ mock_guest.detach_device.side_effect = (
+ lambda dev, persistent, live: drvr.emit_event(
+ libvirtevent.DeviceRemovalFailedEvent(
+ uuids.instance_uuid, dev.alias)))
+
+ self.assertRaises(
+ exception.DeviceDetachFailed,
+ drvr._detach_with_retry,
+ mock_guest,
+ uuids.instance_uuid,
+ mock_get_device_conf_func,
+ device_name='vdb',
+ live=True,
+ )
+
+ mock_guest.has_persistent_configuration.assert_called_once_with()
+ self.assertEqual(1, mock_get_device_conf_func.call_count)
+ mock_guest.detach_device.assert_called_once_with(
+ mock_dev, persistent=False, live=True)
+ # check that the internal event handling is cleaned up
+ self.assertEqual(set(), drvr._device_event_handler._waiters)
+
+ @mock.patch('threading.Event.wait')
+ def test__detach_with_retry_timeout_retry_succeeds(self, mock_event_wait):
+ """Test that that a live detach times out while waiting for the libvirt
+ event but then the retry succeeds.
+ """
+ drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
+ mock_guest = mock.Mock(spec=libvirt_guest.Guest)
+ # for simplycity do a live only detach
+ mock_guest.has_persistent_configuration.return_value = False
+
+ mock_dev = mock.Mock(spec=vconfig.LibvirtConfigGuestDisk)
+ mock_dev.alias = 'virtio-disk1'
+
+ mock_get_device_conf_func = mock.Mock(
+ # the first call is to get the device from the live domain
+ # the second call is to double check that the dev is gone from the
+ # live domain after the first detach attempt
+ # the third call is to double check that the dev is gone from the
+ # live domain after the second detach attempt
+ side_effect=[
+ mock_dev,
+ mock_dev,
+ None,
+ ]
+ )
+ # By mocking threading.Event.wait we prevent the test to wait until the
+ # timeout happens, and by returning False first we simulate to the
+ # caller that the wait returned not becasuse the event is set but
+ # because timeout happend. Then during the retry we return True
+ # signalling that the event is set, i.e. the libvirt event the caller
+ # is waiting for has been received
+ mock_event_wait.side_effect = [False, True]
+
+ drvr._detach_with_retry(
+ mock_guest,
+ uuids.instance_uuid,
+ mock_get_device_conf_func,
+ device_name='vdb',
+ live=True,
+ )
+
+ mock_guest.has_persistent_configuration.assert_called_once_with()
+ # the 3 calls has explained above where the return value is defined
+ self.assertEqual(3, mock_get_device_conf_func.call_count)
+ mock_guest.detach_device.assert_has_calls(
+ [
+ mock.call(mock_dev, persistent=False, live=True),
+ mock.call(mock_dev, persistent=False, live=True),
+ ]
+ )
+ # check that the internal event handling is cleaned up
+ self.assertEqual(set(), drvr._device_event_handler._waiters)
+
+ @mock.patch('threading.Event.wait')
+ def test__detach_with_retry_timeout_retry_unplug_in_progress(
+ self, mock_event_wait
+ ):
+ """Test that that a live detach times out while waiting for the libvirt
+ event but then the retry gets a unplug already in progress error from
+ libvirt, which it ignores, then the detach finishes and the event is
+ received.
+ """
+ drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
+ mock_guest = mock.Mock(spec=libvirt_guest.Guest)
+ # for simplicity do a live only detach
+ mock_guest.has_persistent_configuration.return_value = False
+
+ mock_dev = mock.Mock(spec=vconfig.LibvirtConfigGuestDisk)
+ mock_dev.alias = 'virtio-disk1'
+
+ mock_get_device_conf_func = mock.Mock(
+ # the first call is to get the device from the live domain
+ # the second call is to double check that the dev is gone from the
+ # live domain after the first detach attempt
+ # the third call is to double check that the dev is gone from the
+ # live domain after the second detach attempt
+ side_effect=[
+ mock_dev,
+ mock_dev,
+ None,
+ ]
+ )
+ # By mocking threading.Event.wait we prevent the test to wait until the
+ # timeout happens, and by returning False first we simulate to the
+ # caller that the wait returned not because the event is set but
+ # because timeout happened. Then during the retry we return True
+ # signalling that the event is set, i.e. the libvirt event the caller
+ # is waiting for has been received
+ mock_event_wait.side_effect = [False, True]
+
+ # there will be two detach attempts
+ # 1) simulate that synchronous parts of the detach succeeds
+ # 2) return an error as the previous detach attempts is still ongoing
+ def fake_detach_device(dev, persistent, live):
+ if mock_guest.detach_device.call_count == 0:
+ return
+ else:
+ # In reality these events are in reverse order. However
+ # simulating that would require parallelism in the test. From
+ # the perspective of the code under test the call order does
+ # not matter as the call to detach_device is synchronous.
+ drvr.emit_event(
+ libvirtevent.DeviceRemovedEvent(
+ uuids.instance_uuid, dev.alias))
+ raise fakelibvirt.make_libvirtError(
+ fakelibvirt.libvirtError,
+ msg='error',
+ error_message='already in the process of unplug',
+ error_code=fakelibvirt.VIR_ERR_INTERNAL_ERROR)
+
+ mock_guest.detach_device.side_effect = fake_detach_device
+
+ drvr._detach_with_retry(
+ mock_guest,
+ uuids.instance_uuid,
+ mock_get_device_conf_func,
+ device_name='vdb',
+ live=True,
+ )
+
+ mock_guest.has_persistent_configuration.assert_called_once_with()
+ # the 3 calls has explained above where the return value is defined
+ self.assertEqual(3, mock_get_device_conf_func.call_count)
+ mock_guest.detach_device.assert_has_calls(
+ [
+ mock.call(mock_dev, persistent=False, live=True),
+ mock.call(mock_dev, persistent=False, live=True),
+ ]
+ )
+ # check that the internal event handling is cleaned up
+ self.assertEqual(set(), drvr._device_event_handler._waiters)
+
+ @mock.patch('threading.Event.wait')
+ def test__detach_with_retry_timeout_run_out_of_retries(
+ self, mock_event_wait
+ ):
+ """Test that that a live detach times out while waiting for the libvirt
+ event at every attempt so the driver runs out of retry attempts.
+ """
+ # decreased the number to simplyfy the test
+ self.flags(group='libvirt', device_detach_attempts=2)
+
+ drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
+ mock_guest = mock.Mock(spec=libvirt_guest.Guest)
+ # for simplycity do a live only detach
+ mock_guest.has_persistent_configuration.return_value = False
+
+ mock_dev = mock.Mock(spec=vconfig.LibvirtConfigGuestDisk)
+ mock_dev.alias = 'virtio-disk1'
+
+ mock_get_device_conf_func = mock.Mock(return_value=mock_dev)
+
+ # By mocking threading.Event.wait we prevent the test to wait until the
+ # timeout happens, and by returning False we simulate to the
+ # caller that the wait returned not becasuse the event is set but
+ # because timeout happend.
+ mock_event_wait.return_value = False
+
+ self.assertRaises(
+ exception.DeviceDetachFailed,
+ drvr._detach_with_retry,
+ mock_guest,
+ uuids.instance_uuid,
+ mock_get_device_conf_func,
+ device_name='vdb',
+ live=True,
+ )
+
+ mock_guest.has_persistent_configuration.assert_called_once_with()
+ # the first call is to get the dev we detach, then there is one call
+ # after each retry attempt to check if the device is gone
+ self.assertEqual(3, mock_get_device_conf_func.call_count)
+ mock_guest.detach_device.assert_has_calls(
+ [
+ mock.call(mock_dev, persistent=False, live=True),
+ mock.call(mock_dev, persistent=False, live=True),
+ ]
+ )
+ # check that the internal event handling is cleaned up
+ self.assertEqual(set(), drvr._device_event_handler._waiters)
+
+ def test__detach_with_retry_libvirt_reports_not_found_live_detach_done(
+ self
+ ):
+ """Test that libvirt reports that the device is not found in the
+ persistent domain but as live detach is also requested the original
+ error is just logged and the live detach is attempted.
+ """
+ drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
+ mock_guest = mock.Mock(spec=libvirt_guest.Guest)
+ mock_guest.has_persistent_configuration.return_value = True
+
+ mock_dev = mock.Mock(spec=vconfig.LibvirtConfigGuestDisk)
+ mock_dev.alias = 'virtio-disk1'
+
+ mock_get_device_conf_func = mock.Mock(
+ # the first call is to get the device from the persistent domain
+ # the second call is to get the device from the live domain
+ # the third call is to double check that the dev is gone from the
+ # live domain
+ # NOTE(gibi): as the persistent detach fails with exception the
+ # driver does not double check that the device is removed but
+ # simply move to the live detach
+ side_effect=[
+ mock_dev,
+ mock_dev,
+ None,
+ ]
+ )
+
+ # there will be two detach attempts:
+ # 1) with persistent=True: simulate that libvirt fails synchronosuly
+ # signalling that the device is not found.
+ # 2) with persistent=True: simulate that detach succeeds by emitting
+ # the libvirt event
+ def fake_detach_device(dev, persistent, live):
+ if persistent:
+ raise fakelibvirt.make_libvirtError(
+ fakelibvirt.libvirtError,
+ msg='error',
+ error_code=fakelibvirt.VIR_ERR_DEVICE_MISSING)
+ else:
+ drvr.emit_event(
+ libvirtevent.DeviceRemovedEvent(
+ uuids.instance_uuid, dev.alias))
+
+ mock_guest.detach_device.side_effect = fake_detach_device
+
+ drvr._detach_with_retry(
+ mock_guest,
+ uuids.instance_uuid,
+ mock_get_device_conf_func,
+ device_name='vdb',
+ live=True,
+ )
+
+ mock_guest.has_persistent_configuration.assert_called_once_with()
+ mock_get_device_conf_func.assert_has_calls(
+ [
+ mock.call(from_persistent_config=True),
+ mock.call(),
+ mock.call(),
+ ])
+ mock_guest.detach_device.assert_has_calls(
+ [
+ mock.call(mock_dev, persistent=True, live=False),
+ mock.call(mock_dev, persistent=False, live=True),
+ ]
+ )
+ # check that the internal event handling is cleaned up
+ self.assertEqual(set(), drvr._device_event_handler._waiters)
+
+ def test__detach_with_retry_libvirt_reports_not_found_give_up(self):
+ """Test that libvirt reports that the device is not found in the
+ persistent domain and as live detach is not requested the drive simply
+ bubble up the exception.
+ """
+ drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
+ mock_guest = mock.Mock(spec=libvirt_guest.Guest)
+ mock_guest.has_persistent_configuration.return_value = True
+
+ mock_dev = mock.Mock(spec=vconfig.LibvirtConfigGuestDisk)
+ mock_dev.alias = 'virtio-disk1'
+
+ mock_get_device_conf_func = mock.Mock(
+ # the first call is to get the device from the persistent domain
+ # NOTE(gibi): as the persistent detach fails with exception the
+ # driver does not double check that the device is removed but
+ # simply gives up
+ side_effect=[
+ mock_dev,
+ ]
+ )
+
+ mock_guest.detach_device.side_effect = fakelibvirt.make_libvirtError(
+ fakelibvirt.libvirtError,
+ msg='error',
+ error_code=fakelibvirt.VIR_ERR_DEVICE_MISSING)
+
+ self.assertRaises(
+ exception.DeviceNotFound,
+ drvr._detach_with_retry,
+ mock_guest,
+ uuids.instance_uuid,
+ mock_get_device_conf_func,
+ device_name='vdb',
+ live=False,
+ )
+
+ mock_guest.has_persistent_configuration.assert_called_once_with()
+ mock_get_device_conf_func.assert_called_once_with(
+ from_persistent_config=True)
+ mock_guest.detach_device.assert_called_once_with(
+ mock_dev, persistent=True, live=False)
+
+ def test__detach_with_retry_other_sync_libvirt_error(self):
+ """Test that libvirt reports non device related error during detach
+ from persistent config which causes that the driver gives up
+ regardless of the fact that live detach also requested.
+ """
+ drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
+ mock_guest = mock.Mock(spec=libvirt_guest.Guest)
+ mock_guest.has_persistent_configuration.return_value = True
+
+ mock_dev = mock.Mock(spec=vconfig.LibvirtConfigGuestDisk)
+ mock_dev.alias = 'virtio-disk1'
+
+ mock_get_device_conf_func = mock.Mock(
+ # the first call is to get the device from the persistent domain
+ # the second all is to get the device from the live domain
+ side_effect=[
+ mock_dev,
+ mock_dev,
+ ]
+ )
+ mock_guest.detach_device.side_effect = fakelibvirt.make_libvirtError(
+ fakelibvirt.libvirtError,
+ msg='error',
+ error_code=fakelibvirt.VIR_ERR_NO_DOMAIN)
+
+ self.assertRaises(
+ fakelibvirt.libvirtError,
+ drvr._detach_with_retry,
+ mock_guest,
+ uuids.instance_uuid,
+ mock_get_device_conf_func,
+ device_name='vdb',
+ live=True,
+ )
+
+ mock_guest.has_persistent_configuration.assert_called_once_with()
+ mock_get_device_conf_func.assert_has_calls(
+ [
+ mock.call(from_persistent_config=True),
+ mock.call(),
+ ])
+ # NOTE(gibi): the drive does not attempt a live detach even though
+ # it was requested
+ mock_guest.detach_device.assert_called_once_with(
+ mock_dev, persistent=True, live=False)
+
@mock.patch('nova.objects.block_device.BlockDeviceMapping.save',
new=mock.Mock())
@mock.patch('nova.objects.image_meta.ImageMeta.from_image_ref')
@@ -27326,19 +28008,22 @@ class LibvirtDeviceRemoveEventTestCase(test.NoDBTestCase):
super().setUp()
self.useFixture(fakelibvirt.FakeLibvirtFixture())
- @mock.patch.object(libvirt_driver.LOG, 'debug')
+ @mock.patch.object(libvirt_driver.LOG, 'warning')
@mock.patch('nova.virt.driver.ComputeDriver.emit_event')
@ddt.data(
libvirtevent.DeviceRemovedEvent,
libvirtevent.DeviceRemovalFailedEvent)
def test_libvirt_device_removal_events(
- self, event_type, mock_base_handles, mock_debug
+ self, event_type, mock_base_handles, mock_warning
):
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
event = event_type(uuid=uuids.event, dev=mock.sentinel.dev_alias)
drvr.emit_event(event)
mock_base_handles.assert_not_called()
- mock_debug.assert_not_called()
+ self.assertEqual(1, len(mock_warning.mock_calls))
+ self.assertIn(
+ "the driver is not waiting for it; ignored.",
+ mock_warning.mock_calls[0][1][0])
class AsyncDeviceEventsHandlerTestCase(test.NoDBTestCase):
diff --git a/nova/tests/unit/virt/libvirt/test_guest.py b/nova/tests/unit/virt/libvirt/test_guest.py
index f09c8ae71db1..87b189882581 100644
--- a/nova/tests/unit/virt/libvirt/test_guest.py
+++ b/nova/tests/unit/virt/libvirt/test_guest.py
@@ -21,7 +21,6 @@ from oslo_service import fixture as service_fixture
from oslo_utils import encodeutils
from nova import context
-from nova import exception
from nova import test
from nova.tests.unit.virt.libvirt import fakelibvirt
from nova.virt.libvirt import config as vconfig
@@ -213,212 +212,6 @@ class GuestTestCase(test.NoDBTestCase):
"", flags=(fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG |
fakelibvirt.VIR_DOMAIN_AFFECT_LIVE))
- def test_detach_device_with_retry_from_transient_domain(self):
- conf = mock.Mock(spec=vconfig.LibvirtConfigGuestDevice)
- conf.to_xml.return_value = ""
- get_config = mock.Mock()
- get_config.side_effect = [conf, conf, conf, None, None]
- dev_path = "/dev/vdb"
- self.domain.isPersistent.return_value = False
- retry_detach = self.guest.detach_device_with_retry(
- get_config, dev_path, live=True, inc_sleep_time=.01)
- self.domain.detachDeviceFlags.assert_called_once_with(
- "", flags=fakelibvirt.VIR_DOMAIN_AFFECT_LIVE)
- self.domain.detachDeviceFlags.reset_mock()
- retry_detach()
- self.assertEqual(1, self.domain.detachDeviceFlags.call_count)
-
- def test_detach_device_with_retry_detach_success(self):
- conf = mock.Mock(spec=vconfig.LibvirtConfigGuestDevice)
- conf.to_xml.return_value = ""
- get_config = mock.Mock()
- # Force multiple retries of detach
- get_config.side_effect = [conf, conf, conf, conf, conf, None, None]
- dev_path = "/dev/vdb"
- self.domain.isPersistent.return_value = True
-
- retry_detach = self.guest.detach_device_with_retry(
- get_config, dev_path, live=True, inc_sleep_time=.01)
- # Ensure we've only done the initial detach call
- self.domain.detachDeviceFlags.assert_called_once_with(
- "", flags=(fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG |
- fakelibvirt.VIR_DOMAIN_AFFECT_LIVE))
-
- get_config.assert_called_with(dev_path)
-
- # Some time later, we can do the wait/retry to ensure detach succeeds
- self.domain.detachDeviceFlags.reset_mock()
- retry_detach()
- # Should have two retries before we pretend device is detached
- self.assertEqual(2, self.domain.detachDeviceFlags.call_count)
-
- def test_detach_device_with_retry_detach_failure(self):
- conf = mock.Mock(spec=vconfig.LibvirtConfigGuestDevice)
- conf.to_xml.return_value = ""
- # Continue to return some value for the disk config
- get_config = mock.Mock(return_value=conf)
- self.domain.isPersistent.return_value = True
-
- retry_detach = self.guest.detach_device_with_retry(
- get_config, "/dev/vdb", live=True, inc_sleep_time=.01,
- max_retry_count=3)
- # Ensure we've only done the initial detach call
- self.domain.detachDeviceFlags.assert_called_once_with(
- "", flags=(fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG |
- fakelibvirt.VIR_DOMAIN_AFFECT_LIVE))
-
- # Some time later, we can do the wait/retry to ensure detach
- self.domain.detachDeviceFlags.reset_mock()
- # Should hit max # of retries
- self.assertRaises(exception.DeviceDetachFailed, retry_detach)
- self.assertEqual(4, self.domain.detachDeviceFlags.call_count)
-
- def test_detach_device_with_retry_device_not_found(self):
- get_config = mock.Mock(return_value=None)
- self.domain.isPersistent.return_value = True
- ex = self.assertRaises(
- exception.DeviceNotFound, self.guest.detach_device_with_retry,
- get_config, "/dev/vdb", live=True)
- self.assertIn("/dev/vdb", str(ex))
-
- def test_detach_device_with_retry_device_not_found_alt_name(self):
- """Tests to make sure we use the alternative name in errors."""
- get_config = mock.Mock(return_value=None)
- self.domain.isPersistent.return_value = True
- ex = self.assertRaises(
- exception.DeviceNotFound, self.guest.detach_device_with_retry,
- get_config, mock.sentinel.device, live=True,
- alternative_device_name='foo')
- self.assertIn('foo', str(ex))
-
- @mock.patch.object(libvirt_guest.Guest, "detach_device")
- def _test_detach_device_with_retry_second_detach_failure(
- self, mock_detach, error_code=None, error_message=None,
- supports_device_missing=False):
- # This simulates a retry of the transient/live domain detach
- # failing because the device is not found
- conf = mock.Mock(spec=vconfig.LibvirtConfigGuestDevice)
- conf.to_xml.return_value = ""
- self.domain.isPersistent.return_value = True
-
- get_config = mock.Mock(return_value=conf)
- fake_device = "vdb"
- fake_exc = fakelibvirt.make_libvirtError(
- fakelibvirt.libvirtError, "",
- error_message=error_message,
- error_code=error_code,
- error_domain=fakelibvirt.VIR_FROM_DOMAIN)
- mock_detach.side_effect = [None, fake_exc]
- retry_detach = self.guest.detach_device_with_retry(
- get_config, fake_device, live=True,
- inc_sleep_time=.01, max_retry_count=3)
- # Some time later, we can do the wait/retry to ensure detach
- self.assertRaises(exception.DeviceNotFound, retry_detach)
- # Check that the save_and_reraise_exception context manager didn't log
- # a traceback when the libvirtError was caught and DeviceNotFound was
- # raised.
- self.assertNotIn('Original exception being dropped',
- self.stdlog.logger.output)
-
- def test_detach_device_with_retry_second_detach_device_missing(self):
- self._test_detach_device_with_retry_second_detach_failure(
- error_code=fakelibvirt.VIR_ERR_DEVICE_MISSING,
- error_message="device not found: disk vdb not found",
- supports_device_missing=True)
-
- def _test_detach_device_with_retry_first_detach_failure(
- self, error_code=None, error_message=None,
- supports_device_missing=False):
- # This simulates a persistent or live domain detach failing because the
- # device is not found during the first attempt to detach the device.
- # We should still attempt to detach the device from the live config if
- # the detach from persistent failed OR we should retry the detach from
- # the live config if the first detach from live config failed.
- # Note that the side effects in this test case [fake_exc, None] could
- # not happen in real life if the first detach failed because the detach
- # from live raised not found. In real life, the second attempt to
- # detach from live would raise not found again because the device is
- # not present. The purpose of this test is to verify that we try to
- # detach a second time if the first detach fails, so we are OK with the
- # unrealistic side effects for detach from live failing the first time.
- conf = mock.Mock(spec=vconfig.LibvirtConfigGuestDevice)
- conf.to_xml.return_value = ""
- self.domain.isPersistent.return_value = True
-
- get_config = mock.Mock()
- # Simulate an inactive or live detach attempt which fails (not found)
- # followed by a live config detach attempt that is successful
- get_config.side_effect = [conf, conf, conf, None, None]
- fake_device = "vdb"
- fake_exc = fakelibvirt.make_libvirtError(
- fakelibvirt.libvirtError, "",
- error_message=error_message,
- error_code=error_code,
- error_domain=fakelibvirt.VIR_FROM_DOMAIN)
- # Detach from persistent or live raises not found, detach from live
- # succeeds afterward
- self.domain.detachDeviceFlags.side_effect = [fake_exc, None]
- retry_detach = self.guest.detach_device_with_retry(get_config,
- fake_device, live=True, inc_sleep_time=.01, max_retry_count=3)
- # We should have tried to detach from the persistent domain
- self.domain.detachDeviceFlags.assert_called_once_with(
- "", flags=(fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG |
- fakelibvirt.VIR_DOMAIN_AFFECT_LIVE))
- # During the retry detach, should detach from the live domain
- self.domain.detachDeviceFlags.reset_mock()
- retry_detach()
- # We should have tried to detach from the live domain
- self.domain.detachDeviceFlags.assert_called_once_with(
- "", flags=fakelibvirt.VIR_DOMAIN_AFFECT_LIVE)
-
- def test_detach_device_with_retry_first_detach_device_missing(self):
- self._test_detach_device_with_retry_first_detach_failure(
- error_code=fakelibvirt.VIR_ERR_DEVICE_MISSING,
- error_message="device not found: disk vdb not found",
- supports_device_missing=True)
-
- def test_detach_device_with_already_in_process_of_unplug_error(self):
- # Assert that DeviceNotFound is raised when encountering
- # https://bugzilla.redhat.com/show_bug.cgi?id=1878659
- # This is raised as QEMU returns a VIR_ERR_INTERNAL_ERROR when
- # a request to device_del is made while another is about to complete.
-
- self.domain.isPersistent.return_value = True
- conf = mock.Mock(spec=vconfig.LibvirtConfigGuestDevice)
- conf.to_xml.return_value = ""
-
- existing_unplug_exc = fakelibvirt.make_libvirtError(
- fakelibvirt.libvirtError, "",
- error_message='device vdb is already in the process of unplug',
- error_code=fakelibvirt.VIR_ERR_INTERNAL_ERROR,
- error_domain=fakelibvirt.VIR_FROM_DOMAIN
- )
- device_missing_exc = fakelibvirt.make_libvirtError(
- fakelibvirt.libvirtError, "",
- error_message='device not found: disk vdb not found',
- error_code=fakelibvirt.VIR_ERR_DEVICE_MISSING,
- error_domain=fakelibvirt.VIR_FROM_DOMAIN
- )
-
- # Raise VIR_ERR_INTERNAL_ERROR on the second call before raising
- # VIR_ERR_DEVICE_MISSING to mock the first call successfully detaching
- # the device asynchronously.
- self.domain.detachDeviceFlags.side_effect = [
- None,
- existing_unplug_exc,
- device_missing_exc
- ]
-
- retry_detach = self.guest.detach_device_with_retry(
- mock.Mock(return_value=conf),
- 'vdb',
- live=True,
- inc_sleep_time=.01
- )
-
- # Assert that we raise exception.DeviceNotFound
- self.assertRaises(exception.DeviceNotFound, retry_detach)
-
def test_get_xml_desc(self):
self.guest.get_xml_desc()
self.domain.XMLDesc.assert_called_once_with(flags=0)
diff --git a/nova/tests/unit/virt/test_virt_drivers.py b/nova/tests/unit/virt/test_virt_drivers.py
index 54c587bfdf70..9d3ef54e8975 100644
--- a/nova/tests/unit/virt/test_virt_drivers.py
+++ b/nova/tests/unit/virt/test_virt_drivers.py
@@ -416,6 +416,7 @@ class _VirtDriverTestCase(_FakeDriverBackendTestCase):
self.assertEqual(storage_ip, result['ip'])
@catch_notimplementederror
+ @mock.patch('threading.Event.wait', new=mock.Mock())
@mock.patch.object(libvirt.driver.LibvirtDriver, '_build_device_metadata',
return_value=objects.InstanceDeviceMetadata())
def test_attach_detach_volume(self, _):
@@ -452,6 +453,7 @@ class _VirtDriverTestCase(_FakeDriverBackendTestCase):
'/dev/sda', 2))
@catch_notimplementederror
+ @mock.patch('threading.Event.wait', new=mock.Mock())
@mock.patch.object(libvirt.driver.LibvirtDriver, '_build_device_metadata',
return_value=objects.InstanceDeviceMetadata())
def test_attach_detach_different_power_states(self, _):
diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py
index 8ee8757e40cc..747489aeeb62 100644
--- a/nova/virt/libvirt/driver.py
+++ b/nova/virt/libvirt/driver.py
@@ -504,6 +504,10 @@ class LibvirtDriver(driver.ComputeDriver):
self.pgpu_type_mapping = collections.defaultdict(str)
self.supported_vgpu_types = self._get_supported_vgpu_types()
+ # Handles ongoing device manipultion in libvirt where we wait for the
+ # events about success or failure.
+ self._device_event_handler = AsyncDeviceEventsHandler()
+
def _discover_vpmems(self, vpmem_conf=None):
"""Discover vpmems on host and configuration.
@@ -1400,6 +1404,9 @@ class LibvirtDriver(driver.ComputeDriver):
def destroy(self, context, instance, network_info, block_device_info=None,
destroy_disks=True):
self._destroy(instance)
+ # NOTE(gibi): if there was device detach in progress then we need to
+ # unblock the waiting threads and clean up.
+ self._device_event_handler.cleanup_waiters(instance.uuid)
self.cleanup(context, instance, network_info, block_device_info,
destroy_disks)
@@ -2189,8 +2196,20 @@ class LibvirtDriver(driver.ComputeDriver):
# These are libvirt specific events handled here on the driver
# level instead of propagating them to the compute manager level
if isinstance(event, libvirtevent.DeviceEvent):
- # TODO(gibi): handle it
- pass
+ had_clients = self._device_event_handler.notify_waiters(event)
+
+ if had_clients:
+ LOG.debug(
+ "Received event %s from libvirt while the driver is "
+ "waiting for it; dispatched.",
+ event,
+ )
+ else:
+ LOG.warning(
+ "Received event %s from libvirt but the driver is not "
+ "waiting for it; ignored.",
+ event,
+ )
else:
LOG.debug(
"Received event %s from libvirt but no handler is "
@@ -2201,6 +2220,321 @@ class LibvirtDriver(driver.ComputeDriver):
# manager
super().emit_event(event)
+ def _detach_with_retry(
+ self,
+ guest: libvirt_guest.Guest,
+ instance_uuid: str,
+ # to properly typehint this param we would need typing.Protocol but
+ # that is only available since python 3.8
+ get_device_conf_func: ty.Callable,
+ device_name: str,
+ live: bool,
+ ) -> None:
+ """Detaches a device from the guest
+
+ If live detach is requested then this call will wait for the libvirt
+ event signalling the end of the detach process.
+
+ If the live detach times out then it will retry the detach. Detach from
+ the persistent config is not retried as it is:
+
+ * synchronous and no event is sent from libvirt
+ * it is always expected to succeed if the device is in the domain
+ config
+
+ :param guest: the guest we are detach the device from
+ :param instance_uuid: the UUID of the instance we are detaching the
+ device from
+ :param get_device_conf_func: function which returns the configuration
+ for device from the domain, having one optional boolean parameter
+ `from_persistent_config` to select which domain config to query
+ :param device_name: This is the name of the device used solely for
+ error messages. Note that it is not the same as the device alias
+ used by libvirt to identify the device.
+ :param live: bool to indicate whether it affects the guest in running
+ state. If live is True then the device is detached from both the
+ persistent and the live config. If live is False the device only
+ detached from the persistent config.
+ :raises exception.DeviceNotFound: if the device does not exist in the
+ domain even before we try to detach or if libvirt reported that the
+ device is missing from the domain synchronously.
+ :raises exception.DeviceDetachFailed: if libvirt reported error during
+ detaching from the live domain or we timed out waiting for libvirt
+ events and run out of retries
+ :raises libvirt.libvirtError: for any other errors reported by libvirt
+ synchronously.
+ """
+ persistent = guest.has_persistent_configuration()
+
+ if not persistent and not live:
+ # nothing to do
+ return
+
+ persistent_dev = None
+ if persistent:
+ persistent_dev = get_device_conf_func(from_persistent_config=True)
+
+ live_dev = None
+ if live:
+ live_dev = get_device_conf_func()
+
+ if live and live_dev is None:
+ # caller requested a live detach but device is not present
+ raise exception.DeviceNotFound(device=device_name)
+
+ if not live and persistent_dev is None:
+ # caller requested detach from the persistent domain but device is
+ # not present
+ raise exception.DeviceNotFound(device=device_name)
+
+ if persistent_dev:
+ try:
+ self._detach_from_persistent(
+ guest, instance_uuid, persistent_dev, get_device_conf_func,
+ device_name)
+ except exception.DeviceNotFound:
+ if live:
+ # ignore the error so that we can do the live detach
+ LOG.warning(
+ 'Libvirt reported sync error while detaching '
+ 'device %s from instance %s from the persistent '
+ 'domain config. Ignoring the error to proceed with '
+ 'live detach as that was also requested.',
+ device_name, instance_uuid)
+ else:
+ # if only persistent detach was requested then give up
+ raise
+
+ if live and live_dev:
+ self._detach_from_live_with_retry(
+ guest, instance_uuid, live_dev, get_device_conf_func,
+ device_name)
+
+ def _detach_from_persistent(
+ self,
+ guest: libvirt_guest.Guest,
+ instance_uuid: str,
+ persistent_dev: ty.Union[
+ vconfig.LibvirtConfigGuestDisk,
+ vconfig.LibvirtConfigGuestInterface],
+ get_device_conf_func,
+ device_name: str,
+ ):
+ LOG.debug(
+ 'Attempting to detach device %s from instance %s from '
+ 'the persistent domain config.', device_name, instance_uuid)
+
+ self._detach_sync(
+ persistent_dev, guest, instance_uuid, device_name,
+ persistent=True, live=False)
+
+ # make sure the dev is really gone
+ persistent_dev = get_device_conf_func(
+ from_persistent_config=True)
+ if not persistent_dev:
+ LOG.info(
+ 'Successfully detached device %s from instance %s '
+ 'from the persistent domain config.',
+ device_name, instance_uuid)
+ else:
+ # Based on the libvirt devs this should never happen
+ LOG.warning(
+ 'Failed to detach device %s from instance %s '
+ 'from the persistent domain config. Libvirt did not '
+ 'report any error but the device is still in the '
+ 'config.', device_name, instance_uuid)
+
+ def _detach_from_live_with_retry(
+ self,
+ guest: libvirt_guest.Guest,
+ instance_uuid: str,
+ live_dev: ty.Union[
+ vconfig.LibvirtConfigGuestDisk,
+ vconfig.LibvirtConfigGuestInterface],
+ get_device_conf_func,
+ device_name: str,
+ ):
+ max_attempts = CONF.libvirt.device_detach_attempts
+ for attempt in range(max_attempts):
+ LOG.debug(
+ '(%s/%s): Attempting to detach device %s with device '
+ 'alias %s from instance %s from the live domain config.',
+ attempt + 1, max_attempts, device_name, live_dev.alias,
+ instance_uuid)
+
+ self._detach_from_live_and_wait_for_event(
+ live_dev, guest, instance_uuid, device_name)
+
+ # make sure the dev is really gone
+ live_dev = get_device_conf_func()
+ if not live_dev:
+ LOG.info(
+ 'Successfully detached device %s from instance %s '
+ 'from the live domain config.', device_name, instance_uuid)
+ # we are done
+ return
+
+ LOG.debug(
+ 'Failed to detach device %s with device alias %s from '
+ 'instance %s from the live domain config. Libvirt did not '
+ 'report any error but the device is still in the config.',
+ device_name, live_dev.alias, instance_uuid)
+
+ msg = (
+ 'Run out of retry while detaching device %s with device '
+ 'alias %s from instance %s from the live domain config. '
+ 'Device is still attached to the guest.')
+ LOG.error(msg, device_name, live_dev.alias, instance_uuid)
+ raise exception.DeviceDetachFailed(
+ device=device_name,
+ reason=msg % (device_name, live_dev.alias, instance_uuid))
+
+ def _detach_from_live_and_wait_for_event(
+ self,
+ dev: ty.Union[
+ vconfig.LibvirtConfigGuestDisk,
+ vconfig.LibvirtConfigGuestInterface],
+ guest: libvirt_guest.Guest,
+ instance_uuid: str,
+ device_name: str,
+ ) -> None:
+ """Detaches a device from the live config of the guest and waits for
+ the libvirt event singling the finish of the detach.
+
+ :param dev: the device configuration to be detached
+ :param guest: the guest we are detach the device from
+ :param instance_uuid: the UUID of the instance we are detaching the
+ device from
+ :param device_name: This is the name of the device used solely for
+ error messages.
+ :raises exception.DeviceNotFound: if libvirt reported that the device
+ is missing from the domain synchronously.
+ :raises libvirt.libvirtError: for any other errors reported by libvirt
+ synchronously.
+ :raises DeviceDetachFailed: if libvirt sent DeviceRemovalFailedEvent
+ """
+ # So we will issue an detach to libvirt and we will wait for an
+ # event from libvirt about the result. We need to set up the event
+ # handling before the detach to avoid missing the event if libvirt
+ # is really fast
+ # NOTE(gibi): we need to use the alias name of the device as that
+ # is what libvirt will send back to us in the event
+ waiter = self._device_event_handler.create_waiter(
+ instance_uuid, dev.alias,
+ {libvirtevent.DeviceRemovedEvent,
+ libvirtevent.DeviceRemovalFailedEvent})
+ try:
+ self._detach_sync(
+ dev, guest, instance_uuid, device_name, persistent=False,
+ live=True)
+ except Exception:
+ # clean up the libvirt event handler as we failed synchronously
+ self._device_event_handler.delete_waiter(waiter)
+ raise
+
+ LOG.debug(
+ 'Start waiting for the detach event from libvirt for '
+ 'device %s with device alias %s for instance %s',
+ device_name, dev.alias, instance_uuid)
+ # We issued the detach without any exception so we can wait for
+ # a libvirt event to arrive to notify us about the result
+ # NOTE(gibi): we expect that this call will be unblocked by an
+ # incoming libvirt DeviceRemovedEvent or DeviceRemovalFailedEvent
+ event = self._device_event_handler.wait(
+ waiter, timeout=CONF.libvirt.device_detach_timeout)
+
+ if not event:
+ # This should not happen based on information from the libvirt
+ # developers. But it does at least during the cleanup of the
+ # tempest test case
+ # ServerRescueNegativeTestJSON.test_rescued_vm_detach_volume
+ # Log a warning and let the upper layer detect that the device is
+ # still attached and retry
+ LOG.error(
+ 'Waiting for libvirt event about the detach of '
+ 'device %s with device alias %s from instance %s is timed '
+ 'out.', device_name, dev.alias, instance_uuid)
+
+ if isinstance(event, libvirtevent.DeviceRemovalFailedEvent):
+ # Based on the libvirt developers this signals a permanent failure
+ LOG.error(
+ 'Received DeviceRemovalFailedEvent from libvirt for the '
+ 'detach of device %s with device alias %s from instance %s ',
+ device_name, dev.alias, instance_uuid)
+ raise exception.DeviceDetachFailed(
+ device=device_name,
+ reason="DeviceRemovalFailedEvent received from libvirt")
+
+ @staticmethod
+ def _detach_sync(
+ dev: ty.Union[
+ vconfig.LibvirtConfigGuestDisk,
+ vconfig.LibvirtConfigGuestInterface],
+ guest: libvirt_guest.Guest,
+ instance_uuid: str,
+ device_name: str,
+ persistent: bool,
+ live: bool,
+ ):
+ """Detaches a device from the guest without waiting for libvirt events
+
+ It only handles synchronous errors (i.e. exceptions) but does not wait
+ for any event from libvirt.
+
+ :param dev: the device configuration to be detached
+ :param guest: the guest we are detach the device from
+ :param instance_uuid: the UUID of the instance we are detaching the
+ device from
+ :param device_name: This is the name of the device used solely for
+ error messages.
+ :param live: detach the device from the live domain config only
+ :param persistent: detach the device from the persistent domain config
+ only
+ :raises exception.DeviceNotFound: if libvirt reported that the device
+ is missing from the domain synchronously.
+ :raises libvirt.libvirtError: for any other errors reported by libvirt
+ synchronously.
+ """
+ try:
+ guest.detach_device(dev, persistent=persistent, live=live)
+ except libvirt.libvirtError as ex:
+ code = ex.get_error_code()
+ msg = ex.get_error_message()
+ if code == libvirt.VIR_ERR_DEVICE_MISSING:
+ LOG.debug(
+ 'Libvirt failed to detach device %s from instance %s '
+ 'synchronously (persistent=%s, live=%s) with error: %s.',
+ device_name, instance_uuid, persistent, live, str(ex))
+ raise exception.DeviceNotFound(device=device_name) from ex
+
+ # NOTE(lyarwood): https://bugzilla.redhat.com/1878659
+ # Ignore this known QEMU bug for the time being allowing
+ # our retry logic to handle it.
+ # NOTE(gibi): This can only happen in case of detaching from the
+ # live domain as we never retry a detach from the persistent
+ # domain so we cannot hit an already running detach there.
+ # In case of detaching from the live domain this error can happen
+ # if the caller timed out during the first detach attempt then saw
+ # that the device is still attached and therefore looped over and
+ # and retried the detach. In this case the previous attempt stopped
+ # waiting for the libvirt event. Also libvirt reports that there is
+ # a detach ongoing, so the current attempt expects that a
+ # libvirt event will be still emitted. Therefore we simply return
+ # from here. Then the caller will wait for such event.
+ if (code == libvirt.VIR_ERR_INTERNAL_ERROR and msg and
+ 'already in the process of unplug' in msg
+ ):
+ LOG.debug(
+ 'Ignoring QEMU rejecting our request to detach device %s '
+ 'from instance %s as it is caused by a previous request '
+ 'still being in progress.', device_name, instance_uuid)
+ return
+
+ LOG.warning(
+ 'Unexpected libvirt error while detaching device %s from '
+ 'instance %s: %s', device_name, instance_uuid, str(ex))
+ raise
+
def detach_volume(self, context, connection_info, instance, mountpoint,
encryption=None):
disk_dev = mountpoint.rpartition("/")[2]
@@ -2213,10 +2547,14 @@ class LibvirtDriver(driver.ComputeDriver):
# detaching any attached encryptors or disconnecting the underlying
# volume in _disconnect_volume. Otherwise, the encryptor or volume
# driver may report that the volume is still in use.
- wait_for_detach = guest.detach_device_with_retry(
- guest.get_disk, disk_dev, live=live)
- wait_for_detach()
-
+ get_dev = functools.partial(guest.get_disk, disk_dev)
+ self._detach_with_retry(
+ guest,
+ instance.uuid,
+ get_dev,
+ device_name=disk_dev,
+ live=live
+ )
except exception.InstanceNotFound:
# NOTE(zhaoqin): If the instance does not exist, _lookup_by_name()
# will throw InstanceNotFound exception. Need to
@@ -2423,12 +2761,14 @@ class LibvirtDriver(driver.ComputeDriver):
state = guest.get_power_state(self._host)
live = state in (power_state.RUNNING, power_state.PAUSED)
- # Now we are going to loop until the interface is detached or we
- # timeout.
- wait_for_detach = guest.detach_device_with_retry(
- guest.get_interface_by_cfg, cfg, live=live,
- alternative_device_name=self.vif_driver.get_vif_devname(vif))
- wait_for_detach()
+ get_dev = functools.partial(guest.get_interface_by_cfg, cfg)
+ self._detach_with_retry(
+ guest,
+ instance.uuid,
+ get_dev,
+ device_name=self.vif_driver.get_vif_devname(vif),
+ live=live,
+ )
except exception.DeviceDetachFailed:
# We failed to detach the device even with the retry loop, so let's
# dump some debug information to the logs before raising back up.
diff --git a/nova/virt/libvirt/guest.py b/nova/virt/libvirt/guest.py
index 66e06896c69e..0831010117e4 100644
--- a/nova/virt/libvirt/guest.py
+++ b/nova/virt/libvirt/guest.py
@@ -31,7 +31,6 @@ import time
from lxml import etree
from oslo_log import log as logging
-from oslo_service import loopingcall
from oslo_utils import encodeutils
from oslo_utils import excutils
from oslo_utils import importutils
@@ -399,109 +398,6 @@ class Guest(object):
devs.append(dev)
return devs
- def detach_device_with_retry(self, get_device_conf_func, device, live,
- max_retry_count=7, inc_sleep_time=10,
- max_sleep_time=60,
- alternative_device_name=None):
- """Detaches a device from the guest. After the initial detach request,
- a function is returned which can be used to ensure the device is
- successfully removed from the guest domain (retrying the removal as
- necessary).
-
- :param get_device_conf_func: function which takes device as a parameter
- and returns the configuration for device
- :param device: device to detach
- :param live: bool to indicate whether it affects the guest in running
- state
- :param max_retry_count: number of times the returned function will
- retry a detach before failing
- :param inc_sleep_time: incremental time to sleep in seconds between
- detach retries
- :param max_sleep_time: max sleep time in seconds beyond which the sleep
- time will not be incremented using param
- inc_sleep_time. On reaching this threshold,
- max_sleep_time will be used as the sleep time.
- :param alternative_device_name: This is an alternative identifier for
- the device if device is not an ID, used solely for error messages.
- """
- alternative_device_name = alternative_device_name or device
-
- def _try_detach_device(conf, persistent=False, live=False):
- # Raise DeviceNotFound if the device isn't found during detach
- try:
- self.detach_device(conf, persistent=persistent, live=live)
- if get_device_conf_func(device) is None:
- LOG.debug('Successfully detached device %s from guest. '
- 'Persistent? %s. Live? %s',
- device, persistent, live)
- except libvirt.libvirtError as ex:
- with excutils.save_and_reraise_exception(reraise=False) as ctx:
- code = ex.get_error_code()
- msg = ex.get_error_message()
- if code == libvirt.VIR_ERR_DEVICE_MISSING:
- raise exception.DeviceNotFound(
- device=alternative_device_name)
- # NOTE(lyarwood): https://bugzilla.redhat.com/1878659
- # Ignore this known QEMU bug for the time being allowing
- # our retry logic to fire again and hopefully see that
- # the device has been removed asynchronously by QEMU
- # in the meantime when the next call to detach raises
- # VIR_ERR_DEVICE_MISSING.
- if (code == libvirt.VIR_ERR_INTERNAL_ERROR and
- msg and 'already in the process of unplug' in msg
- ):
- LOG.debug('Ignoring QEMU rejecting our request to '
- 'detach as it is caused by a previous '
- 'request still being in progress.')
- return
-
- # Re-raise the original exception if we're not raising
- # DeviceNotFound instead. This will avoid logging of a
- # "Original exception being dropped" traceback.
- ctx.reraise = True
-
- conf = get_device_conf_func(device)
- if conf is None:
- raise exception.DeviceNotFound(device=alternative_device_name)
-
- persistent = self.has_persistent_configuration()
-
- LOG.debug('Attempting initial detach for device %s',
- alternative_device_name)
- try:
- _try_detach_device(conf, persistent, live)
- except exception.DeviceNotFound:
- # NOTE(melwitt): There are effectively two configs for an instance.
- # The persistent config (affects instance upon next boot) and the
- # live config (affects running instance). When we detach a device,
- # we need to detach it from both configs if the instance has a
- # persistent config and a live config. If we tried to detach the
- # device with persistent=True and live=True and it was not found,
- # we should still try to detach from the live config, so continue.
- if persistent and live:
- pass
- else:
- raise
- LOG.debug('Start retrying detach until device %s is gone.',
- alternative_device_name)
-
- @loopingcall.RetryDecorator(max_retry_count=max_retry_count,
- inc_sleep_time=inc_sleep_time,
- max_sleep_time=max_sleep_time,
- exceptions=exception.DeviceDetachFailed)
- def _do_wait_and_retry_detach():
- config = get_device_conf_func(device)
- if config is not None:
- # Device is already detached from persistent config
- # and only the live config needs to be updated.
- _try_detach_device(config, persistent=False, live=live)
-
- reason = _("Unable to detach the device from the live config.")
- raise exception.DeviceDetachFailed(
- device=alternative_device_name, reason=reason)
-
- return _do_wait_and_retry_detach
-
def detach_device(self, conf, persistent=False, live=False):
"""Detaches device to the guest.
diff --git a/releasenotes/notes/libvirt-event-based-device-detach-23ac037004d753b1.yaml b/releasenotes/notes/libvirt-event-based-device-detach-23ac037004d753b1.yaml
new file mode 100644
index 000000000000..12c55fc315d4
--- /dev/null
+++ b/releasenotes/notes/libvirt-event-based-device-detach-23ac037004d753b1.yaml
@@ -0,0 +1,11 @@
+---
+fixes:
+ - |
+ To fix `device detach issues`__ in the libvirt driver the detach logic has
+ been changed from a sleep based retry loop to waiting for libvirt domain
+ events. During this change we also introduced two new config options to
+ allow fine tuning the retry logic. For details see the description of the
+ new ``[libvirt]device_detach_attempts`` and
+ ``[libvirt]device_detach_timeout`` config options.
+
+ .. __: https://bugs.launchpad.net/nova/+bug/1882521