From 2b1544390cd0afd4d2b0c6ea4636a49a73b50dcf Mon Sep 17 00:00:00 2001 From: Neil Jerram Date: Sat, 26 Jan 2019 21:22:33 +0000 Subject: [PATCH] Allow utime call to fail on qcow2 image base file This utime call sometimes fails, with EACCES (Permission Denied), when the base file is on an NFS client filesystem. I don't understand why, but wonder if it's a similar problem as the one that motivated using touch instead of utime in ec9d5e375e2. In any case, IIUC, timing isn't the primary thing that the image cache manager uses to determine when the base file is in use. The primary mechanism for that is whether there is a matching disk file for a current instance. The timestamp on the base file is only used when deciding whether to delete a base file that is _not_ in use; so it is not a big deal if that deletion happens slightly earlier, for an unused base file, because of one of these preceding utime calls having failed. Conflicts: nova/tests/unit/virt/libvirt/test_imagebackend.py NOTE(mriedem): Conflict is due to not having change I154b1ef14dbace803841cb3eed3ff02dd0020588 in Queens. Closes-Bug: #1809123 Co-Authored-By: Matthew Booth Change-Id: Idc131ff426f1707150867030fa5a69b77a7fc832 (cherry picked from commit 362bba9011f8445285c3f72a0b00630163bfbc2f) (cherry picked from commit 7d0dd625d35a08d159b50154809a53e8444ae25c) --- .../unit/virt/libvirt/test_imagebackend.py | 42 +++++++++++++++++++ nova/virt/libvirt/imagebackend.py | 32 ++++++++++++-- 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/nova/tests/unit/virt/libvirt/test_imagebackend.py b/nova/tests/unit/virt/libvirt/test_imagebackend.py index 8b7695e2714e..6999a0345fff 100644 --- a/nova/tests/unit/virt/libvirt/test_imagebackend.py +++ b/nova/tests/unit/virt/libvirt/test_imagebackend.py @@ -14,6 +14,7 @@ # under the License. import base64 +import errno import inspect import os import shutil @@ -1840,3 +1841,44 @@ class BackendTestCase(test.NoDBTestCase): def test_image_default(self): self._test_image('default', imagebackend.Flat, imagebackend.Qcow2) + + +class UtimeWorkaroundTestCase(test.NoDBTestCase): + ERROR_STUB = "sentinel.path: [Errno 13] Permission Denied" + + def setUp(self): + super(UtimeWorkaroundTestCase, self).setUp() + self.mock_utime = self.useFixture( + fixtures.MockPatch('nova.privsep.path.utime')).mock + + def test_update_utime_no_error(self): + # If utime doesn't raise an error we shouldn't raise or log anything + imagebackend._update_utime_ignore_eacces(mock.sentinel.path) + self.mock_utime.assert_called_once_with(mock.sentinel.path) + self.assertNotIn(self.ERROR_STUB, self.stdlog.logger.output) + + def test_update_utime_eacces(self): + # If utime raises EACCES we should log the error, but ignore it + e = OSError() + e.errno = errno.EACCES + e.strerror = "Permission Denied" + self.mock_utime.side_effect = e + + imagebackend._update_utime_ignore_eacces(mock.sentinel.path) + self.mock_utime.assert_called_once_with(mock.sentinel.path) + self.assertIn(self.ERROR_STUB, self.stdlog.logger.output) + + def test_update_utime_eio(self): + # If utime raises any other error we should raise it + e = OSError() + e.errno = errno.EIO + e.strerror = "IO Error" + self.mock_utime.side_effect = e + + ex = self.assertRaises( + OSError, imagebackend._update_utime_ignore_eacces, + mock.sentinel.path) + self.assertIs(ex, e) + + self.mock_utime.assert_called_once_with(mock.sentinel.path) + self.assertNotIn(self.ERROR_STUB, self.stdlog.logger.output) diff --git a/nova/virt/libvirt/imagebackend.py b/nova/virt/libvirt/imagebackend.py index ceb40589ebb5..79edbf1842d8 100644 --- a/nova/virt/libvirt/imagebackend.py +++ b/nova/virt/libvirt/imagebackend.py @@ -16,6 +16,7 @@ import abc import base64 import contextlib +import errno import functools import os import shutil @@ -51,6 +52,31 @@ LOG = logging.getLogger(__name__) IMAGE_API = image.API() +# NOTE(neiljerram): Don't worry if this fails. This sometimes happens, with +# EACCES (Permission Denied), when the base file is on an NFS client +# filesystem. I don't understand why, but wonder if it's a similar problem as +# the one that motivated using touch instead of utime in ec9d5e375e2. In any +# case, IIUC, timing isn't the primary thing that the image cache manager uses +# to determine when the base file is in use. The primary mechanism for that is +# whether there is a matching disk file for a current instance. The timestamp +# on the base file is only used when deciding whether to delete a base file +# that is _not_ in use; so it is not a big deal if that deletion happens +# slightly earlier, for an unused base file, because of one of these preceding +# utime calls having failed. +# NOTE(mdbooth): Only use this method for updating the utime of an image cache +# entry during disk creation. +# TODO(mdbooth): Remove or rework this when we understand the problem. +def _update_utime_ignore_eacces(path): + try: + nova.privsep.path.utime(path) + except OSError as e: + with excutils.save_and_reraise_exception(logger=LOG) as ctxt: + if e.errno == errno.EACCES: + LOG.warning("Ignoring failure to update utime of %(path)s: " + "%(error)s", {'path': path, 'error': e}) + ctxt.reraise = False + + @six.add_metaclass(abc.ABCMeta) class Image(object): @@ -542,7 +568,7 @@ class Flat(Image): # NOTE(mikal): Update the mtime of the base file so the image # cache manager knows it is in use. - nova.privsep.path.utime(base) + _update_utime_ignore_eacces(base) self.verify_base_size(base, size) if not os.path.exists(self.path): with fileutils.remove_path_on_error(self.path): @@ -598,7 +624,7 @@ class Qcow2(Image): # NOTE(ankit): Update the mtime of the base file so the image # cache manager knows it is in use. - nova.privsep.path.utime(base) + _update_utime_ignore_eacces(base) self.verify_base_size(base, size) legacy_backing_size = None @@ -1093,7 +1119,7 @@ class Ploop(Image): prepare_template(target=base, *args, **kwargs) else: # Disk already exists in cache, just update time - nova.privsep.path.utime(base) + _update_utime_ignore_eacces(base) self.verify_base_size(base, size) if os.path.exists(self.path):