From 89fe504abfbd41a9c37f9b544c0ce98b23b45799 Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Fri, 15 May 2020 15:00:20 +0200 Subject: [PATCH] Reserve DISK_GB resource for the image cache If the nova compute image cache is on the same disk as the instance directory then the images in the cache will consume the same disk resource as the nova instances which can lead to disk overallocation. This patch adds a new config option [workarounds]/reserve_disk_resource_for_image_cache . If it is set the libvirt driver will reserve DISK_GB resources in placement for the actual size of the image cache. This is a low complexity solution with known limitations: * Reservation is updated long after the image is downloaded * This code allows the reservation to push the provider into negative available resource if the reservation + allocations exceed the total inventory. Change-Id: If874f018ea996587e178219569c2903c2ee923cf Closes-Bug: #1878024 --- nova/conf/workarounds.py | 19 ++++++ nova/tests/unit/virt/libvirt/test_driver.py | 27 ++++++++ .../unit/virt/libvirt/test_imagecache.py | 61 +++++++++++++++++++ nova/virt/imagecache.py | 9 +++ nova/virt/libvirt/driver.py | 15 ++++- nova/virt/libvirt/imagecache.py | 25 ++++++++ ...disk-for-image-cache-ef6688f869b12bcb.yaml | 10 +++ 7 files changed, 165 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/bug-1878024-reserve-disk-for-image-cache-ef6688f869b12bcb.yaml diff --git a/nova/conf/workarounds.py b/nova/conf/workarounds.py index b7e30bb25800..8eadc0b6ec3a 100644 --- a/nova/conf/workarounds.py +++ b/nova/conf/workarounds.py @@ -326,6 +326,25 @@ Related options: * ``compute_driver`` (libvirt) * ``disable_qemu_native_luksv1`` (workarounds) +"""), + cfg.BoolOpt('reserve_disk_resource_for_image_cache', + default=False, + help=""" +If it is set to True then the libvirt driver will reserve DISK_GB resource for +the images stored in the image cache. If the +:oslo.config:option:`DEFAULT.instances_path` is on different disk partition +than the image cache directory then the driver will not reserve resource for +the cache. + +Such disk reservation is done by a periodic task in the resource tracker that +runs every :oslo.config:option:`update_resources_interval` seconds. So the +reservation is not updated immediately when an image is cached. + +Related options: + +* :oslo.config:option:`DEFAULT.instances_path` +* :oslo.config:option:`image_cache.subdirectory_name` +* :oslo.config:option:`update_resources_interval` """), ] diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 63d34e743be7..29a0549ab0ba 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -20999,6 +20999,33 @@ class TestUpdateProviderTree(test.NoDBTestCase): self.assertEqual(expected_inventory, self.pt.data(cn.uuid).inventory) self.assertEqual(expected_allocations, allocations) + @mock.patch('nova.virt.libvirt.imagecache.ImageCacheManager.' + 'get_disk_usage', return_value=1000) + def test_image_cache_disk_reservation(self, mock_cache_manager): + self.flags(reserved_host_disk_mb=1024) + + self.flags(group='workarounds', + reserve_disk_resource_for_image_cache=False) + + self.driver.update_provider_tree(self.pt, self.cn_rp['name']) + + disk_reservation = self.pt.data( + self.cn_rp['uuid']).inventory['DISK_GB']['reserved'] + # Only the reserved_host_disk_mb is counted + self.assertEqual(1, disk_reservation) + + # Turn the cache reservation on + self.flags(group='workarounds', + reserve_disk_resource_for_image_cache=True) + + self.driver.update_provider_tree(self.pt, self.cn_rp['name']) + + disk_reservation = self.pt.data( + self.cn_rp['uuid']).inventory['DISK_GB']['reserved'] + # Now both the reserved_host_disk_mb and the cache size is counted + # Note that the mock returns bytes which is rounded up to the next GB + self.assertEqual(2, disk_reservation) + class TraitsComparisonMixin(object): diff --git a/nova/tests/unit/virt/libvirt/test_imagecache.py b/nova/tests/unit/virt/libvirt/test_imagecache.py index 4310cff43762..4b7ff59f2a17 100644 --- a/nova/tests/unit/virt/libvirt/test_imagecache.py +++ b/nova/tests/unit/virt/libvirt/test_imagecache.py @@ -672,3 +672,64 @@ class ImageCacheManagerTestCase(test.NoDBTestCase): remove_lock=False) mock_synchronized.assert_called_once_with(lock_file, external=True, lock_path=lock_path) + + @mock.patch('os.stat') + def test_cache_dir_is_on_same_dev_as_instances_dir(self, mock_stat): + mock_stat.side_effect = [mock.Mock(st_dev=0), mock.Mock(st_dev=1)] + + manager = imagecache.ImageCacheManager() + + self.assertFalse(manager.cache_dir_is_on_same_dev_as_instances_dir) + mock_stat.assert_has_calls([ + mock.call(CONF.instances_path), + mock.call( + os.path.join( + CONF.instances_path, + CONF.image_cache.subdirectory_name))]) + + mock_stat.reset_mock() + mock_stat.side_effect = [mock.Mock(st_dev=0), mock.Mock(st_dev=0)] + self.assertTrue(manager.cache_dir_is_on_same_dev_as_instances_dir) + + @mock.patch('nova.virt.libvirt.imagecache.ImageCacheManager.' + 'cache_dir_is_on_same_dev_as_instances_dir', + new=mock.PropertyMock(return_value=False)) + def test_get_disk_usage_cache_is_on_different_disk(self): + manager = imagecache.ImageCacheManager() + + self.assertEqual(0, manager.get_disk_usage()) + + @mock.patch('os.listdir') + @mock.patch('nova.virt.libvirt.imagecache.ImageCacheManager.' + 'cache_dir_is_on_same_dev_as_instances_dir', + new=mock.PropertyMock(return_value=True)) + def test_get_disk_usage_empty(self, mock_listdir): + mock_listdir.return_value = [] + + manager = imagecache.ImageCacheManager() + + self.assertEqual(0, manager.get_disk_usage()) + + @mock.patch('os.path.isfile') + @mock.patch('os.listdir') + @mock.patch('os.stat') + @mock.patch('nova.virt.libvirt.imagecache.ImageCacheManager.' + 'cache_dir_is_on_same_dev_as_instances_dir', + new=mock.PropertyMock(return_value=True)) + def test_get_disk_usage(self, mock_stat, mock_listdir, mock_isfile): + mock_stat.side_effect = [ + # stat calls on each file in the cache directory to get the size + mock.Mock(st_blocks=10), # foo + mock.Mock(st_blocks=20), # bar + ] + mock_listdir.return_value = ['foo', 'bar', 'some-dir'] + mock_isfile.side_effect = [ + True, # foo + True, # bar + False, # some-dir + ] + + manager = imagecache.ImageCacheManager() + + # size is calculated from as st_blocks * 512 + self.assertEqual(10 * 512 + 20 * 512, manager.get_disk_usage()) diff --git a/nova/virt/imagecache.py b/nova/virt/imagecache.py index 4338a2510a74..9bda89e46519 100644 --- a/nova/virt/imagecache.py +++ b/nova/virt/imagecache.py @@ -108,3 +108,12 @@ class ImageCacheManager(object): populated in the cached stats will be used for the cache management. """ raise NotImplementedError() + + def get_disk_usage(self): + """Return the size of the physical disk space used for the cache. + + :returns: The disk space in bytes that is occupied from + CONF.instances_path or zero if the cache directory is mounted + to a different disk device. + """ + raise NotImplementedError() diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index e60e08cf3216..72ae3179d0df 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -7560,7 +7560,8 @@ class LibvirtDriver(driver.ComputeDriver): 'max_unit': disk_gb, 'step_size': 1, 'allocation_ratio': ratios[orc.DISK_GB], - 'reserved': self._get_reserved_host_disk_gb_from_config(), + 'reserved': (self._get_reserved_host_disk_gb_from_config() + + self._get_disk_size_reserved_for_image_cache()), } # TODO(sbauza): Use traits to providing vGPU types. For the moment, @@ -9977,6 +9978,18 @@ class LibvirtDriver(driver.ComputeDriver): images.fetch_to_raw(context, image_id, path) return True + def _get_disk_size_reserved_for_image_cache(self): + """Return the amount of DISK_GB resource need to be reserved for the + image cache. + + :returns: The disk space in GB + """ + if not CONF.workarounds.reserve_disk_resource_for_image_cache: + return 0 + + return compute_utils.convert_mb_to_ceil_gb( + self.image_cache_manager.get_disk_usage() / 1024.0 / 1024.0) + def _is_path_shared_with(self, dest, path): # NOTE (rmk): There are two methods of determining whether we are # on the same filesystem: the source and dest IP are the diff --git a/nova/virt/libvirt/imagecache.py b/nova/virt/libvirt/imagecache.py index 4cd1c30012c0..780afa91a39a 100644 --- a/nova/virt/libvirt/imagecache.py +++ b/nova/virt/libvirt/imagecache.py @@ -354,3 +354,28 @@ class ImageCacheManager(imagecache.ImageCacheManager): # perform the aging and image verification self._age_and_verify_cached_images(context, all_instances, base_dir) self._age_and_verify_swap_images(context, base_dir) + + def get_disk_usage(self): + if not self.cache_dir_is_on_same_dev_as_instances_dir: + return 0 + + # NOTE(gibi): we need to use the disk size occupied from the file + # system as images in the cache will not grow to their virtual size. + # NOTE(gibi): st.blocks is always measured in 512 byte blocks see man + # fstat + return sum( + os.stat(os.path.join(self.cache_dir, f)).st_blocks * 512 + for f in os.listdir(self.cache_dir) + if os.path.isfile(os.path.join(self.cache_dir, f))) + + @property + def cache_dir(self): + return os.path.join( + CONF.instances_path, CONF.image_cache.subdirectory_name) + + @property + def cache_dir_is_on_same_dev_as_instances_dir(self): + # NOTE(gibi): this does not work on Windows properly as st_dev is + # always 0 + return (os.stat(CONF.instances_path).st_dev == + os.stat(self.cache_dir).st_dev) diff --git a/releasenotes/notes/bug-1878024-reserve-disk-for-image-cache-ef6688f869b12bcb.yaml b/releasenotes/notes/bug-1878024-reserve-disk-for-image-cache-ef6688f869b12bcb.yaml new file mode 100644 index 000000000000..10d76629f69d --- /dev/null +++ b/releasenotes/notes/bug-1878024-reserve-disk-for-image-cache-ef6688f869b12bcb.yaml @@ -0,0 +1,10 @@ +--- +fixes: + - | + A new ``[workarounds]/reserve_disk_resource_for_image_cache`` config + option was added to fix the `bug 1878024`_ where the images in the compute + image cache overallocate the local disk. If this new config is set then the + libvirt driver will reserve DISK_GB resources in placement based on the + actual disk usage of the image cache. + + .. _bug 1878024: https://bugs.launchpad.net/nova/+bug/1878024