Reserve DISK_GB resource for the image cache

If the nova compute image cache is on the same disk as the instance
directory then the images in the cache will consume the same disk
resource as the nova instances which can lead to disk overallocation.

This patch adds a new config option
[workarounds]/reserve_disk_resource_for_image_cache . If it is set the
libvirt driver will reserve DISK_GB resources in placement for the
actual size of the image cache.

This is a low complexity solution with known limitations:
* Reservation is updated long after the image is downloaded
* This code allows the reservation to push the provider into negative
available resource if the reservation + allocations exceed the total
inventory.

Change-Id: If874f018ea996587e178219569c2903c2ee923cf
Closes-Bug: #1878024
This commit is contained in:
Balazs Gibizer 2020-05-15 15:00:20 +02:00
parent ef3b570732
commit 89fe504abf
7 changed files with 165 additions and 1 deletions

View File

@ -326,6 +326,25 @@ Related options:
* ``compute_driver`` (libvirt) * ``compute_driver`` (libvirt)
* ``disable_qemu_native_luksv1`` (workarounds) * ``disable_qemu_native_luksv1`` (workarounds)
"""),
cfg.BoolOpt('reserve_disk_resource_for_image_cache',
default=False,
help="""
If it is set to True then the libvirt driver will reserve DISK_GB resource for
the images stored in the image cache. If the
:oslo.config:option:`DEFAULT.instances_path` is on different disk partition
than the image cache directory then the driver will not reserve resource for
the cache.
Such disk reservation is done by a periodic task in the resource tracker that
runs every :oslo.config:option:`update_resources_interval` seconds. So the
reservation is not updated immediately when an image is cached.
Related options:
* :oslo.config:option:`DEFAULT.instances_path`
* :oslo.config:option:`image_cache.subdirectory_name`
* :oslo.config:option:`update_resources_interval`
"""), """),
] ]

View File

@ -20999,6 +20999,33 @@ class TestUpdateProviderTree(test.NoDBTestCase):
self.assertEqual(expected_inventory, self.pt.data(cn.uuid).inventory) self.assertEqual(expected_inventory, self.pt.data(cn.uuid).inventory)
self.assertEqual(expected_allocations, allocations) self.assertEqual(expected_allocations, allocations)
@mock.patch('nova.virt.libvirt.imagecache.ImageCacheManager.'
'get_disk_usage', return_value=1000)
def test_image_cache_disk_reservation(self, mock_cache_manager):
self.flags(reserved_host_disk_mb=1024)
self.flags(group='workarounds',
reserve_disk_resource_for_image_cache=False)
self.driver.update_provider_tree(self.pt, self.cn_rp['name'])
disk_reservation = self.pt.data(
self.cn_rp['uuid']).inventory['DISK_GB']['reserved']
# Only the reserved_host_disk_mb is counted
self.assertEqual(1, disk_reservation)
# Turn the cache reservation on
self.flags(group='workarounds',
reserve_disk_resource_for_image_cache=True)
self.driver.update_provider_tree(self.pt, self.cn_rp['name'])
disk_reservation = self.pt.data(
self.cn_rp['uuid']).inventory['DISK_GB']['reserved']
# Now both the reserved_host_disk_mb and the cache size is counted
# Note that the mock returns bytes which is rounded up to the next GB
self.assertEqual(2, disk_reservation)
class TraitsComparisonMixin(object): class TraitsComparisonMixin(object):

View File

@ -672,3 +672,64 @@ class ImageCacheManagerTestCase(test.NoDBTestCase):
remove_lock=False) remove_lock=False)
mock_synchronized.assert_called_once_with(lock_file, external=True, mock_synchronized.assert_called_once_with(lock_file, external=True,
lock_path=lock_path) lock_path=lock_path)
@mock.patch('os.stat')
def test_cache_dir_is_on_same_dev_as_instances_dir(self, mock_stat):
mock_stat.side_effect = [mock.Mock(st_dev=0), mock.Mock(st_dev=1)]
manager = imagecache.ImageCacheManager()
self.assertFalse(manager.cache_dir_is_on_same_dev_as_instances_dir)
mock_stat.assert_has_calls([
mock.call(CONF.instances_path),
mock.call(
os.path.join(
CONF.instances_path,
CONF.image_cache.subdirectory_name))])
mock_stat.reset_mock()
mock_stat.side_effect = [mock.Mock(st_dev=0), mock.Mock(st_dev=0)]
self.assertTrue(manager.cache_dir_is_on_same_dev_as_instances_dir)
@mock.patch('nova.virt.libvirt.imagecache.ImageCacheManager.'
'cache_dir_is_on_same_dev_as_instances_dir',
new=mock.PropertyMock(return_value=False))
def test_get_disk_usage_cache_is_on_different_disk(self):
manager = imagecache.ImageCacheManager()
self.assertEqual(0, manager.get_disk_usage())
@mock.patch('os.listdir')
@mock.patch('nova.virt.libvirt.imagecache.ImageCacheManager.'
'cache_dir_is_on_same_dev_as_instances_dir',
new=mock.PropertyMock(return_value=True))
def test_get_disk_usage_empty(self, mock_listdir):
mock_listdir.return_value = []
manager = imagecache.ImageCacheManager()
self.assertEqual(0, manager.get_disk_usage())
@mock.patch('os.path.isfile')
@mock.patch('os.listdir')
@mock.patch('os.stat')
@mock.patch('nova.virt.libvirt.imagecache.ImageCacheManager.'
'cache_dir_is_on_same_dev_as_instances_dir',
new=mock.PropertyMock(return_value=True))
def test_get_disk_usage(self, mock_stat, mock_listdir, mock_isfile):
mock_stat.side_effect = [
# stat calls on each file in the cache directory to get the size
mock.Mock(st_blocks=10), # foo
mock.Mock(st_blocks=20), # bar
]
mock_listdir.return_value = ['foo', 'bar', 'some-dir']
mock_isfile.side_effect = [
True, # foo
True, # bar
False, # some-dir
]
manager = imagecache.ImageCacheManager()
# size is calculated from as st_blocks * 512
self.assertEqual(10 * 512 + 20 * 512, manager.get_disk_usage())

View File

@ -108,3 +108,12 @@ class ImageCacheManager(object):
populated in the cached stats will be used for the cache management. populated in the cached stats will be used for the cache management.
""" """
raise NotImplementedError() raise NotImplementedError()
def get_disk_usage(self):
"""Return the size of the physical disk space used for the cache.
:returns: The disk space in bytes that is occupied from
CONF.instances_path or zero if the cache directory is mounted
to a different disk device.
"""
raise NotImplementedError()

View File

@ -7560,7 +7560,8 @@ class LibvirtDriver(driver.ComputeDriver):
'max_unit': disk_gb, 'max_unit': disk_gb,
'step_size': 1, 'step_size': 1,
'allocation_ratio': ratios[orc.DISK_GB], 'allocation_ratio': ratios[orc.DISK_GB],
'reserved': self._get_reserved_host_disk_gb_from_config(), 'reserved': (self._get_reserved_host_disk_gb_from_config() +
self._get_disk_size_reserved_for_image_cache()),
} }
# TODO(sbauza): Use traits to providing vGPU types. For the moment, # TODO(sbauza): Use traits to providing vGPU types. For the moment,
@ -9977,6 +9978,18 @@ class LibvirtDriver(driver.ComputeDriver):
images.fetch_to_raw(context, image_id, path) images.fetch_to_raw(context, image_id, path)
return True return True
def _get_disk_size_reserved_for_image_cache(self):
"""Return the amount of DISK_GB resource need to be reserved for the
image cache.
:returns: The disk space in GB
"""
if not CONF.workarounds.reserve_disk_resource_for_image_cache:
return 0
return compute_utils.convert_mb_to_ceil_gb(
self.image_cache_manager.get_disk_usage() / 1024.0 / 1024.0)
def _is_path_shared_with(self, dest, path): def _is_path_shared_with(self, dest, path):
# NOTE (rmk): There are two methods of determining whether we are # NOTE (rmk): There are two methods of determining whether we are
# on the same filesystem: the source and dest IP are the # on the same filesystem: the source and dest IP are the

View File

@ -354,3 +354,28 @@ class ImageCacheManager(imagecache.ImageCacheManager):
# perform the aging and image verification # perform the aging and image verification
self._age_and_verify_cached_images(context, all_instances, base_dir) self._age_and_verify_cached_images(context, all_instances, base_dir)
self._age_and_verify_swap_images(context, base_dir) self._age_and_verify_swap_images(context, base_dir)
def get_disk_usage(self):
if not self.cache_dir_is_on_same_dev_as_instances_dir:
return 0
# NOTE(gibi): we need to use the disk size occupied from the file
# system as images in the cache will not grow to their virtual size.
# NOTE(gibi): st.blocks is always measured in 512 byte blocks see man
# fstat
return sum(
os.stat(os.path.join(self.cache_dir, f)).st_blocks * 512
for f in os.listdir(self.cache_dir)
if os.path.isfile(os.path.join(self.cache_dir, f)))
@property
def cache_dir(self):
return os.path.join(
CONF.instances_path, CONF.image_cache.subdirectory_name)
@property
def cache_dir_is_on_same_dev_as_instances_dir(self):
# NOTE(gibi): this does not work on Windows properly as st_dev is
# always 0
return (os.stat(CONF.instances_path).st_dev ==
os.stat(self.cache_dir).st_dev)

View File

@ -0,0 +1,10 @@
---
fixes:
- |
A new ``[workarounds]/reserve_disk_resource_for_image_cache`` config
option was added to fix the `bug 1878024`_ where the images in the compute
image cache overallocate the local disk. If this new config is set then the
libvirt driver will reserve DISK_GB resources in placement based on the
actual disk usage of the image cache.
.. _bug 1878024: https://bugs.launchpad.net/nova/+bug/1878024