diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index cb7a8f76b2df..7f8ed406ad14 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -14882,7 +14882,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, instance) def test_private_destroy_ebusy_timeout(self): - # Tests that _destroy will retry 3 times to destroy the guest when an + # Tests that _destroy will retry 6 times to destroy the guest when an # EBUSY is raised, but eventually times out and raises the libvirtError ex = fakelibvirt.make_libvirtError( fakelibvirt.libvirtError, @@ -14902,7 +14902,7 @@ class LibvirtConnTestCase(test.NoDBTestCase, self.assertRaises(fakelibvirt.libvirtError, drvr._destroy, instance) - self.assertEqual(3, mock_guest.poweroff.call_count) + self.assertEqual(6, mock_guest.poweroff.call_count) def test_private_destroy_ebusy_multiple_attempt_ok(self): # Tests that the _destroy attempt loop is broken when EBUSY is no @@ -14928,6 +14928,37 @@ class LibvirtConnTestCase(test.NoDBTestCase, self.assertEqual(2, mock_guest.poweroff.call_count) + @mock.patch.object(libvirt_driver.LOG, 'warning') + @mock.patch.object(fakelibvirt.Connection, 'getLibVersion', + return_value=versionutils.convert_version_to_int( + libvirt_driver.MIN_LIBVIRT_BETTER_SIGKILL_HANDLING)) + def test_min_libvirt_better_sigkill_handling_warning(self, + mock_warning, + mock_get_libversion): + ex = fakelibvirt.make_libvirtError( + fakelibvirt.libvirtError, + ("Failed to terminate process 26425 with SIGKILL: " + "Device or resource busy"), + error_code=fakelibvirt.VIR_ERR_SYSTEM_ERROR, + int1=errno.EBUSY) + + mock_guest = mock.Mock(libvirt_guest.Guest, id=1) + mock_guest.poweroff = mock.Mock(side_effect=ex) + + instance = objects.Instance(**self.test_instance) + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + + with mock.patch.object(drvr._host, 'get_guest', + return_value=mock_guest): + raised = self.assertRaises(fakelibvirt.libvirtError, + drvr._destroy, + instance) + self.assertEqual(fakelibvirt.VIR_ERR_SYSTEM_ERROR, + raised.get_error_code()) + + mock_warning.assert_called_once() + mock_guest.poweroff.assert_called_once() + @mock.patch.object(fakelibvirt.libvirtError, 'get_error_code') @mock.patch.object(host.Host, '_get_domain', side_effect=exception.InstanceNotFound( diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 89622734ad0e..b8b7514d2de7 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -282,6 +282,11 @@ MIN_QEMU_FILE_BACKED_DISCARD_VERSION = (2, 10, 0) MIN_LIBVIRT_NATIVE_TLS_VERSION = (4, 4, 0) MIN_QEMU_NATIVE_TLS_VERSION = (2, 11, 0) +# If the host has this libvirt version, then we skip the retry loop of +# instance destroy() call, as libvirt itself increased the wait time +# before the SIGKILL signal takes effect. +MIN_LIBVIRT_BETTER_SIGKILL_HANDLING = (4, 7, 0) + VGPU_RESOURCE_SEMAPHORE = "vgpu_resources" @@ -1005,18 +1010,36 @@ class LibvirtDriver(driver.ComputeDriver): # steal time from the cloud host. ie 15 wallclock # seconds may have passed, but the VM might have only # have a few seconds of scheduled run time. - LOG.warning('Error from libvirt during destroy. ' - 'Code=%(errcode)s Error=%(e)s; ' - 'attempt %(attempt)d of 3', - {'errcode': errcode, 'e': e, - 'attempt': attempt}, - instance=instance) + # + # TODO(kchamart): Once MIN_LIBVIRT_VERSION + # reaches v4.7.0, (a) rewrite the above note, + # and (b) remove the following code that retries + # _destroy() API call (which gives SIGKILL 30 + # seconds to take effect) -- because from v4.7.0 + # onwards, libvirt _automatically_ increases the + # timeout to 30 seconds. This was added in the + # following libvirt commits: + # + # - 9a4e4b942 (process: wait longer 5->30s on + # hard shutdown) + # + # - be2ca0444 (process: wait longer on kill + # per assigned Hostdev) with excutils.save_and_reraise_exception() as ctxt: - # Try up to 3 times before giving up. - if attempt < 3: - ctxt.reraise = False - self._destroy(instance, attempt + 1) - return + if not self._host.has_min_version( + MIN_LIBVIRT_BETTER_SIGKILL_HANDLING): + LOG.warning('Error from libvirt during ' + 'destroy. Code=%(errcode)s ' + 'Error=%(e)s; attempt ' + '%(attempt)d of 6 ', + {'errcode': errcode, 'e': e, + 'attempt': attempt}, + instance=instance) + # Try up to 6 times before giving up. + if attempt < 6: + ctxt.reraise = False + self._destroy(instance, attempt + 1) + return if not is_okay: with excutils.save_and_reraise_exception():