From d39934ad6afb7e2729bb45235f363ada86012d15 Mon Sep 17 00:00:00 2001 From: melanie witt Date: Fri, 28 Jul 2017 17:06:53 +0000 Subject: [PATCH] Detach device from live domain even if not found on persistent In a past attempt to fix a bug [1], we started raising DeviceNotFound if a device wasn't found on the persistent domain. This was to address a scenario where the guest ignored the detach from the live domain because it was busy and we wanted to avoid failing a later detach request to the user (compute handles DeviceNotFound). Unfortunately, in the above case, a later detach request won't fail to the user but it also won't detach from the live domain. It sees the device already detached from the persistent domain and doesn't attempt to detach from the live domain. This is a serious problem because it's possible for a volume to be attached to two live domains and data corruption can occur. This adds an attempt to detach from the live domain even if we had already detached from the persistent domain in the past. Closes-Bug: #1707238 [1] https://review.openstack.org/386257 Change-Id: I8cd056fa17184a98c31547add0e9fb2d363d0908 --- nova/tests/unit/virt/libvirt/test_guest.py | 50 +++++++++++++++++++--- nova/virt/libvirt/guest.py | 15 ++++++- 2 files changed, 57 insertions(+), 8 deletions(-) diff --git a/nova/tests/unit/virt/libvirt/test_guest.py b/nova/tests/unit/virt/libvirt/test_guest.py index a65e3c6fd165..09dea8859889 100644 --- a/nova/tests/unit/virt/libvirt/test_guest.py +++ b/nova/tests/unit/virt/libvirt/test_guest.py @@ -327,26 +327,62 @@ class GuestTestCase(test.NoDBTestCase): # Some time later, we can do the wait/retry to ensure detach self.assertRaises(exception.DeviceNotFound, retry_detach) - @mock.patch.object(libvirt_guest.Guest, "detach_device") - def test_detach_device_with_retry_invalid_argument(self, mock_detach): + def test_detach_device_with_retry_invalid_argument(self): # This simulates a persistent domain detach failing because # the device is not found conf = mock.Mock(spec=vconfig.LibvirtConfigGuestDevice) conf.to_xml.return_value = "" self.domain.isPersistent.return_value = True - get_config = mock.Mock(return_value=conf) + get_config = mock.Mock() + # Simulate the persistent domain attach attempt followed by the live + # domain attach attempt and success + get_config.side_effect = [conf, conf, None] fake_device = "vdb" fake_exc = fakelibvirt.make_libvirtError( fakelibvirt.libvirtError, "", error_message="invalid argument: no target device vdb", error_code=fakelibvirt.VIR_ERR_INVALID_ARG, error_domain=fakelibvirt.VIR_FROM_DOMAIN) - mock_detach.side_effect = fake_exc + # Detach from persistent raises not found, detach from live succeeds + self.domain.detachDeviceFlags.side_effect = [fake_exc, None] + retry_detach = self.guest.detach_device_with_retry(get_config, + fake_device, live=True, inc_sleep_time=.01, max_retry_count=3) + # We should have tried to detach from the persistent domain + self.domain.detachDeviceFlags.assert_called_once_with( + "", flags=(fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG | + fakelibvirt.VIR_DOMAIN_AFFECT_LIVE)) + # During the retry detach, should detach from the live domain + self.domain.detachDeviceFlags.reset_mock() + retry_detach() + # We should have tried to detach from the live domain + self.domain.detachDeviceFlags.assert_called_once_with( + "", flags=fakelibvirt.VIR_DOMAIN_AFFECT_LIVE) + + def test_detach_device_with_retry_invalid_argument_no_live(self): + # This simulates a persistent domain detach failing because + # the device is not found + conf = mock.Mock(spec=vconfig.LibvirtConfigGuestDevice) + conf.to_xml.return_value = "" + self.domain.isPersistent.return_value = True + + get_config = mock.Mock() + # Simulate the persistent domain attach attempt + get_config.return_value = conf + fake_device = "vdb" + fake_exc = fakelibvirt.make_libvirtError( + fakelibvirt.libvirtError, "", + error_message="invalid argument: no target device vdb", + error_code=fakelibvirt.VIR_ERR_INVALID_ARG, + error_domain=fakelibvirt.VIR_FROM_DOMAIN) + # Detach from persistent raises not found + self.domain.detachDeviceFlags.side_effect = fake_exc self.assertRaises(exception.DeviceNotFound, - self.guest.detach_device_with_retry, - get_config, fake_device, live=True, inc_sleep_time=.01, - max_retry_count=3) + self.guest.detach_device_with_retry, get_config, + fake_device, live=False, inc_sleep_time=.01, max_retry_count=3) + # We should have tried to detach from the persistent domain + self.domain.detachDeviceFlags.assert_called_once_with( + "", flags=fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG) def test_get_xml_desc(self): self.guest.get_xml_desc() diff --git a/nova/virt/libvirt/guest.py b/nova/virt/libvirt/guest.py index bafbee2e6065..e2e35e2e385f 100644 --- a/nova/virt/libvirt/guest.py +++ b/nova/virt/libvirt/guest.py @@ -429,7 +429,20 @@ class Guest(object): LOG.debug('Attempting initial detach for device %s', alternative_device_name) - _try_detach_device(conf, persistent, live) + try: + _try_detach_device(conf, persistent, live) + except exception.DeviceNotFound: + # NOTE(melwitt): There are effectively two configs for an instance. + # The persistent config (affects instance upon next boot) and the + # live config (affects running instance). When we detach a device, + # we need to detach it from both configs if the instance has a + # persistent config and a live config. If we tried to detach the + # device with persistent=True and live=True and it was not found, + # we should still try to detach from the live config, so continue. + if persistent and live: + pass + else: + raise LOG.debug('Start retrying detach until device %s is gone.', alternative_device_name)