Merge "Add a workaround config toggle to refuse ceph image upload"

This commit is contained in:
Zuul 2020-01-10 08:41:43 +00:00 committed by Gerrit Code Review
commit b0cae0750c
4 changed files with 104 additions and 0 deletions

View File

@ -246,6 +246,30 @@ candidates if necessary. This has a slight performance impact and is not
necessary on new or upgraded deployments where the new configuration has been
set on all hosts. By setting this option, the second lookup is disabled and the
scheduler will only request ``PCPU``-based allocations.
"""),
cfg.BoolOpt(
'never_download_image_if_on_rbd',
default=False,
help="""
When booting from an image on a ceph-backed compute node, if the image does not
already reside on the ceph cluster (as would be the case if glance is
also using the same cluster), nova will download the image from glance and
upload it to ceph itself. If using multiple ceph clusters, this may cause nova
to unintentionally duplicate the image in a non-COW-able way in the local
ceph deployment, wasting space.
For more information, refer to the bug report:
https://bugs.launchpad.net/nova/+bug/1858877
Enabling this option will cause nova to *refuse* to boot an instance if it
would require downloading the image from glance and uploading it to ceph
itself.
Related options:
* ``compute_driver`` (libvirt)
* ``[libvirt]/images_type`` (rbd)
"""),
]

View File

@ -21454,6 +21454,52 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
None)
self.assertFalse(mock_inject.called)
@mock.patch('nova.virt.libvirt.utils.fetch_image')
@mock.patch('nova.virt.libvirt.storage.rbd_utils.RBDDriver')
@mock.patch.object(imagebackend, 'IMAGE_API')
def test_create_fetch_image_ceph_workaround(self, mock_image, mock_rbd,
mock_fetch):
# Make sure that rbd clone will fail as un-clone-able
mock_rbd.is_cloneable.return_value = False
# Make sure the rbd code thinks the image does not already exist
mock_rbd.return_value.exists.return_value = False
# Make sure the rbd code says the image is small
mock_rbd.return_value.size.return_value = 128 * units.Mi
# Make sure IMAGE_API.get() returns a raw image
mock_image.get.return_value = {'locations': [], 'disk_format': 'raw'}
instance = self._create_instance()
disk_images = {'image_id': 'foo'}
self.flags(images_type='rbd', group='libvirt')
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
def do_create():
# Reset the fetch mock and run our driver method so we can
# check for called-ness after each attempt
mock_fetch.reset_mock()
drvr._create_and_inject_local_root(self.context,
instance,
False,
'',
disk_images,
get_injection_info(),
None)
# Do an image create with rbd
do_create()
# Make sure it tried fetch, which implies that it tried and
# failed to clone.
mock_fetch.assert_called()
# Enable the workaround
self.flags(never_download_image_if_on_rbd=True,
group='workarounds')
# Ensure that we raise the original ImageUnacceptable from the
# failed clone...
self.assertRaises(exception.ImageUnacceptable, do_create)
# ...and ensure that we did _not_ try to fetch
mock_fetch.assert_not_called()
@mock.patch('nova.virt.netutils.get_injected_network_template')
@mock.patch('nova.virt.disk.api.inject_data')
@mock.patch.object(libvirt_driver.LibvirtDriver, "_conn")

View File

@ -3867,9 +3867,24 @@ class LibvirtDriver(driver.ComputeDriver):
backend.create_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)
if backend.SUPPORTS_CLONE:
def clone_fallback_to_fetch(*args, **kwargs):
refuse_fetch = (
CONF.libvirt.images_type == 'rbd' and
CONF.workarounds.never_download_image_if_on_rbd)
try:
backend.clone(context, disk_images['image_id'])
except exception.ImageUnacceptable:
if refuse_fetch:
# Re-raise the exception from the failed
# ceph clone. The compute manager expects
# ImageUnacceptable as a possible result
# of spawn(), from which this is called.
with excutils.save_and_reraise_exception():
LOG.warning(
'Image %s is not on my ceph and '
'[workarounds]/'
'never_download_image_if_on_rbd=True;'
' refusing to fetch and upload.',
disk_images['image_id'])
libvirt_utils.fetch_image(*args, **kwargs)
fetch_func = clone_fallback_to_fetch
else:

View File

@ -0,0 +1,19 @@
---
other:
- |
Nova now has a config option called
``[workarounds]/never_download_image_if_on_rbd`` which helps to
avoid pathological storage behavior with multiple ceph clusters.
Currently, Nova does *not* support multiple ceph clusters
properly, but Glance can be configured with them. If an instance
is booted from an image residing in a ceph cluster other than the
one Nova knows about, it will silently download it from Glance and
re-upload the image to the local ceph privately for that
instance. Unlike the behavior you expect when configuring Nova and
Glance for ceph, Nova will continue to do this over and over for
the same image when subsequent instances are booted, consuming a
large amount of storage unexpectedly. The new workaround option
will cause Nova to refuse to do this download/upload behavior and
instead fail the instance boot. It is simply a stop-gap effort to
allow unsupported deployments with multiple ceph clusters from
silently consuming large amounts of disk space.