qemu-img do not use cache=none if no O_DIRECT support

If /var/lib/nova/instances is mounted on a filesystem like tmpfs that
doesn't have support for O_DIRECT, "qemu-img convert" currently crashes
because it's unconditionally using the "-t none" flag.

This patch therefore:
- moves the _supports_direct_io() function out of the libvirt driver,
from nova/virt/libvirt/driver.py to nova/utils.py and makes it public.
- uses that function to decide to use -t none or -t writethrough when
converting images with qemu-img.

Closes-Bug: #1734784

Co-Authored-By: melanie witt <melwittt@gmail.com>

Change-Id: Ifb47de00abf3f83442ca5264fbc24885df924a19
This commit is contained in:
Thomas Goirand 2017-11-28 23:22:56 +01:00 committed by melanie witt
parent 42706270b9
commit e6ce9557f8
7 changed files with 143 additions and 108 deletions

@ -13,6 +13,7 @@
# under the License. # under the License.
import datetime import datetime
import errno
import hashlib import hashlib
import importlib import importlib
import os import os
@ -25,6 +26,7 @@ from keystoneauth1 import exceptions as ks_exc
from keystoneauth1.identity import base as ks_identity from keystoneauth1.identity import base as ks_identity
from keystoneauth1 import session as ks_session from keystoneauth1 import session as ks_session
import mock import mock
from mox3 import mox
import netaddr import netaddr
from oslo_concurrency import processutils from oslo_concurrency import processutils
from oslo_config import cfg from oslo_config import cfg
@ -1438,3 +1440,54 @@ class GetEndpointTestCase(test.NoDBTestCase):
self.adap.get_endpoint_data.assert_not_called() self.adap.get_endpoint_data.assert_not_called()
self.assertEqual(3, self.adap.get_endpoint.call_count) self.assertEqual(3, self.adap.get_endpoint.call_count)
self.assertEqual('public', self.adap.interface) self.assertEqual('public', self.adap.interface)
def _behave_supports_direct_io(self, raise_open=False, raise_write=False,
exc=ValueError()):
open_behavior = os.open(os.path.join('.', '.directio.test'),
os.O_CREAT | os.O_WRONLY | os.O_DIRECT)
if raise_open:
open_behavior.AndRaise(exc)
else:
open_behavior.AndReturn(3)
write_bahavior = os.write(3, mox.IgnoreArg())
if raise_write:
write_bahavior.AndRaise(exc)
# ensure unlink(filepath) will actually remove the file by deleting
# the remaining link to it in close(fd)
os.close(3)
os.unlink(3)
def test_supports_direct_io(self):
# O_DIRECT is not supported on all Python runtimes, so on platforms
# where it's not supported (e.g. Mac), we can still test the code-path
# by stubbing out the value.
if not hasattr(os, 'O_DIRECT'):
# `mock` seems to have trouble stubbing an attr that doesn't
# originally exist, so falling back to stubbing out the attribute
# directly.
os.O_DIRECT = 16384
self.addCleanup(delattr, os, 'O_DIRECT')
einval = OSError()
einval.errno = errno.EINVAL
self.mox.StubOutWithMock(os, 'open')
self.mox.StubOutWithMock(os, 'write')
self.mox.StubOutWithMock(os, 'close')
self.mox.StubOutWithMock(os, 'unlink')
_supports_direct_io = utils.supports_direct_io
self._behave_supports_direct_io()
self._behave_supports_direct_io(raise_write=True)
self._behave_supports_direct_io(raise_open=True)
self._behave_supports_direct_io(raise_write=True, exc=einval)
self._behave_supports_direct_io(raise_open=True, exc=einval)
self.mox.ReplayAll()
self.assertTrue(_supports_direct_io('.'))
self.assertRaises(ValueError, _supports_direct_io, '.')
self.assertRaises(ValueError, _supports_direct_io, '.')
self.assertFalse(_supports_direct_io('.'))
self.assertFalse(_supports_direct_io('.'))
self.mox.VerifyAll()

@ -6835,57 +6835,6 @@ class LibvirtConnTestCase(test.NoDBTestCase,
self.assertEqual(len(interfaces), 2) self.assertEqual(len(interfaces), 2)
self.assertEqual(interfaces[0].get('type'), 'bridge') self.assertEqual(interfaces[0].get('type'), 'bridge')
def _behave_supports_direct_io(self, raise_open=False, raise_write=False,
exc=ValueError()):
open_behavior = os.open(os.path.join('.', '.directio.test'),
os.O_CREAT | os.O_WRONLY | os.O_DIRECT)
if raise_open:
open_behavior.AndRaise(exc)
else:
open_behavior.AndReturn(3)
write_bahavior = os.write(3, mox.IgnoreArg())
if raise_write:
write_bahavior.AndRaise(exc)
# ensure unlink(filepath) will actually remove the file by deleting
# the remaining link to it in close(fd)
os.close(3)
os.unlink(3)
def test_supports_direct_io(self):
# O_DIRECT is not supported on all Python runtimes, so on platforms
# where it's not supported (e.g. Mac), we can still test the code-path
# by stubbing out the value.
if not hasattr(os, 'O_DIRECT'):
# `mock` seems to have trouble stubbing an attr that doesn't
# originally exist, so falling back to stubbing out the attribute
# directly.
os.O_DIRECT = 16384
self.addCleanup(delattr, os, 'O_DIRECT')
einval = OSError()
einval.errno = errno.EINVAL
self.mox.StubOutWithMock(os, 'open')
self.mox.StubOutWithMock(os, 'write')
self.mox.StubOutWithMock(os, 'close')
self.mox.StubOutWithMock(os, 'unlink')
_supports_direct_io = libvirt_driver.LibvirtDriver._supports_direct_io
self._behave_supports_direct_io()
self._behave_supports_direct_io(raise_write=True)
self._behave_supports_direct_io(raise_open=True)
self._behave_supports_direct_io(raise_write=True, exc=einval)
self._behave_supports_direct_io(raise_open=True, exc=einval)
self.mox.ReplayAll()
self.assertTrue(_supports_direct_io('.'))
self.assertRaises(ValueError, _supports_direct_io, '.')
self.assertRaises(ValueError, _supports_direct_io, '.')
self.assertFalse(_supports_direct_io('.'))
self.assertFalse(_supports_direct_io('.'))
self.mox.VerifyAll()
def _check_xml_and_container(self, instance): def _check_xml_and_container(self, instance):
instance_ref = objects.Instance(**instance) instance_ref = objects.Instance(**instance)
image_meta = objects.ImageMeta.from_dict(self.test_image_meta) image_meta = objects.ImageMeta.from_dict(self.test_image_meta)
@ -6982,12 +6931,11 @@ class LibvirtConnTestCase(test.NoDBTestCase,
self.stub_out('os.open', os_open_stub) self.stub_out('os.open', os_open_stub)
@staticmethod
def connection_supports_direct_io_stub(dirpath): def connection_supports_direct_io_stub(dirpath):
return directio_supported return directio_supported
self.stubs.Set(libvirt_driver.LibvirtDriver, self.stub_out('nova.utils.supports_direct_io',
'_supports_direct_io', connection_supports_direct_io_stub) connection_supports_direct_io_stub)
instance_ref = objects.Instance(**self.test_instance) instance_ref = objects.Instance(**self.test_instance)
image_meta = objects.ImageMeta.from_dict(self.test_image_meta) image_meta = objects.ImageMeta.from_dict(self.test_image_meta)
@ -16526,7 +16474,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
# get_guest_xml(). # get_guest_xml().
@mock.patch.object(libvirt_driver.LibvirtDriver, '_set_host_enabled') @mock.patch.object(libvirt_driver.LibvirtDriver, '_set_host_enabled')
@mock.patch.object(libvirt_driver.LibvirtDriver, '_build_device_metadata') @mock.patch.object(libvirt_driver.LibvirtDriver, '_build_device_metadata')
@mock.patch.object(libvirt_driver.LibvirtDriver, '_supports_direct_io') @mock.patch('nova.utils.supports_direct_io')
@mock.patch('nova.api.metadata.base.InstanceMetadata') @mock.patch('nova.api.metadata.base.InstanceMetadata')
def _test_finish_migration(self, mock_instance_metadata, def _test_finish_migration(self, mock_instance_metadata,
mock_supports_direct_io, mock_supports_direct_io,
@ -17485,7 +17433,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
# get_guest_xml(). # get_guest_xml().
@mock.patch.object(libvirt_driver.LibvirtDriver, '_set_host_enabled') @mock.patch.object(libvirt_driver.LibvirtDriver, '_set_host_enabled')
@mock.patch.object(libvirt_driver.LibvirtDriver, '_build_device_metadata') @mock.patch.object(libvirt_driver.LibvirtDriver, '_build_device_metadata')
@mock.patch.object(libvirt_driver.LibvirtDriver, '_supports_direct_io') @mock.patch('nova.utils.supports_direct_io')
@mock.patch('nova.api.metadata.base.InstanceMetadata') @mock.patch('nova.api.metadata.base.InstanceMetadata')
def _test_rescue(self, instance, def _test_rescue(self, instance,
mock_instance_metadata, mock_supports_direct_io, mock_instance_metadata, mock_supports_direct_io,

@ -628,7 +628,8 @@ disk size: 4.4M
mock_images.assert_called_once_with( mock_images.assert_called_once_with(
_context, image_id, target) _context, image_id, target)
def test_fetch_raw_image(self): @mock.patch('nova.utils.supports_direct_io', return_value=True)
def test_fetch_raw_image(self, mock_direct_io):
def fake_execute(*cmd, **kwargs): def fake_execute(*cmd, **kwargs):
self.executes.append(cmd) self.executes.append(cmd)

@ -45,9 +45,10 @@ class QemuTestCase(test.NoDBTestCase):
self.assertTrue(image_info) self.assertTrue(image_info)
self.assertTrue(str(image_info)) self.assertTrue(str(image_info))
@mock.patch('nova.utils.supports_direct_io', return_value=True)
@mock.patch.object(utils, 'execute', @mock.patch.object(utils, 'execute',
side_effect=processutils.ProcessExecutionError) side_effect=processutils.ProcessExecutionError)
def test_convert_image_with_errors(self, mocked_execute): def test_convert_image_with_errors(self, mocked_execute, mock_direct_io):
self.assertRaises(exception.ImageUnacceptable, self.assertRaises(exception.ImageUnacceptable,
images.convert_image, images.convert_image,
'/path/that/does/not/exist', '/path/that/does/not/exist',
@ -77,3 +78,23 @@ class QemuTestCase(test.NoDBTestCase):
'Image href123 is unacceptable.*', 'Image href123 is unacceptable.*',
images.fetch_to_raw, images.fetch_to_raw,
None, 'href123', '/no/path') None, 'href123', '/no/path')
@mock.patch('nova.utils.supports_direct_io', return_value=True)
@mock.patch('nova.utils.execute')
def test_convert_image_with_direct_io_support(self, mock_execute,
mock_direct_io):
images._convert_image('source', 'dest', 'in_format', 'out_format',
run_as_root=False)
expected = ('qemu-img', 'convert', '-t', 'none', '-O', 'out_format',
'-f', 'in_format', 'source', 'dest')
self.assertTupleEqual(expected, mock_execute.call_args[0])
@mock.patch('nova.utils.supports_direct_io', return_value=False)
@mock.patch('nova.utils.execute')
def test_convert_image_without_direct_io_support(self, mock_execute,
mock_direct_io):
images._convert_image('source', 'dest', 'in_format', 'out_format',
run_as_root=False)
expected = ('qemu-img', 'convert', '-t', 'writethrough',
'-O', 'out_format', '-f', 'in_format', 'source', 'dest')
self.assertTupleEqual(expected, mock_execute.call_args[0])

@ -20,9 +20,11 @@
import contextlib import contextlib
import copy import copy
import datetime import datetime
import errno
import functools import functools
import hashlib import hashlib
import inspect import inspect
import mmap
import os import os
import pyclbr import pyclbr
import random import random
@ -1379,3 +1381,50 @@ def get_endpoint(ksa_adapter):
raise ks_exc.EndpointNotFound( raise ks_exc.EndpointNotFound(
"Could not find requested endpoint for any of the following " "Could not find requested endpoint for any of the following "
"interfaces: %s" % interfaces) "interfaces: %s" % interfaces)
def supports_direct_io(dirpath):
if not hasattr(os, 'O_DIRECT'):
LOG.debug("This python runtime does not support direct I/O")
return False
testfile = os.path.join(dirpath, ".directio.test")
hasDirectIO = True
fd = None
try:
fd = os.open(testfile, os.O_CREAT | os.O_WRONLY | os.O_DIRECT)
# Check is the write allowed with 512 byte alignment
align_size = 512
m = mmap.mmap(-1, align_size)
m.write(b"x" * align_size)
os.write(fd, m)
LOG.debug("Path '%(path)s' supports direct I/O",
{'path': dirpath})
except OSError as e:
if e.errno == errno.EINVAL:
LOG.debug("Path '%(path)s' does not support direct I/O: "
"'%(ex)s'", {'path': dirpath, 'ex': e})
hasDirectIO = False
else:
with excutils.save_and_reraise_exception():
LOG.error("Error on '%(path)s' while checking "
"direct I/O: '%(ex)s'",
{'path': dirpath, 'ex': e})
except Exception as e:
with excutils.save_and_reraise_exception():
LOG.error("Error on '%(path)s' while checking direct I/O: "
"'%(ex)s'", {'path': dirpath, 'ex': e})
finally:
# ensure unlink(filepath) will actually remove the file by deleting
# the remaining link to it in close(fd)
if fd is not None:
os.close(fd)
try:
os.unlink(testfile)
except Exception:
pass
return hasDirectIO

@ -118,7 +118,18 @@ def _convert_image(source, dest, in_format, out_format, run_as_root):
# on persistent storage when the command exits. Without (2), a host crash # on persistent storage when the command exits. Without (2), a host crash
# may leave a corrupt image in the image cache, which Nova cannot recover # may leave a corrupt image in the image cache, which Nova cannot recover
# automatically. # automatically.
cmd = ('qemu-img', 'convert', '-t', 'none', '-O', out_format) # NOTE(zigo): we cannot use -t none if the instances dir is mounted on a
# filesystem that doesn't have support for O_DIRECT, which is the case
# for example with tmpfs. This simply crashes "openstack server create"
# in environments like live distributions. In such case, the best choice
# is writethrough, which is power-failure safe, but still faster than
# writeback.
if utils.supports_direct_io(CONF.instances_path):
cache_mode = 'none'
else:
cache_mode = 'writethrough'
cmd = ('qemu-img', 'convert', '-t', cache_mode, '-O', out_format)
if in_format is not None: if in_format is not None:
cmd = cmd + ('-f', in_format) cmd = cmd + ('-f', in_format)
cmd = cmd + (source, dest) cmd = cmd + (source, dest)

@ -32,7 +32,6 @@ import errno
import functools import functools
import glob import glob
import itertools import itertools
import mmap
import operator import operator
import os import os
import pwd import pwd
@ -410,7 +409,7 @@ class LibvirtDriver(driver.ComputeDriver):
# is safe for migration provided the filesystem is cache coherent # is safe for migration provided the filesystem is cache coherent
# (cluster filesystems typically are, but things like NFS are not). # (cluster filesystems typically are, but things like NFS are not).
self._disk_cachemode = "none" self._disk_cachemode = "none"
if not self._supports_direct_io(CONF.instances_path): if not utils.supports_direct_io(CONF.instances_path):
self._disk_cachemode = "writethrough" self._disk_cachemode = "writethrough"
return self._disk_cachemode return self._disk_cachemode
@ -2961,53 +2960,6 @@ class LibvirtDriver(driver.ComputeDriver):
return ctype.ConsoleSerial(host=hostname, port=port) return ctype.ConsoleSerial(host=hostname, port=port)
raise exception.ConsoleTypeUnavailable(console_type='serial') raise exception.ConsoleTypeUnavailable(console_type='serial')
@staticmethod
def _supports_direct_io(dirpath):
if not hasattr(os, 'O_DIRECT'):
LOG.debug("This python runtime does not support direct I/O")
return False
testfile = os.path.join(dirpath, ".directio.test")
hasDirectIO = True
fd = None
try:
fd = os.open(testfile, os.O_CREAT | os.O_WRONLY | os.O_DIRECT)
# Check is the write allowed with 512 byte alignment
align_size = 512
m = mmap.mmap(-1, align_size)
m.write(b"x" * align_size)
os.write(fd, m)
LOG.debug("Path '%(path)s' supports direct I/O",
{'path': dirpath})
except OSError as e:
if e.errno == errno.EINVAL:
LOG.debug("Path '%(path)s' does not support direct I/O: "
"'%(ex)s'", {'path': dirpath, 'ex': e})
hasDirectIO = False
else:
with excutils.save_and_reraise_exception():
LOG.error("Error on '%(path)s' while checking "
"direct I/O: '%(ex)s'",
{'path': dirpath, 'ex': e})
except Exception as e:
with excutils.save_and_reraise_exception():
LOG.error("Error on '%(path)s' while checking direct I/O: "
"'%(ex)s'", {'path': dirpath, 'ex': e})
finally:
# ensure unlink(filepath) will actually remove the file by deleting
# the remaining link to it in close(fd)
if fd is not None:
os.close(fd)
try:
os.unlink(testfile)
except Exception:
pass
return hasDirectIO
@staticmethod @staticmethod
def _create_ephemeral(target, ephemeral_size, def _create_ephemeral(target, ephemeral_size,
fs_label, os_type, is_block_dev=False, fs_label, os_type, is_block_dev=False,