diff --git a/cinder/tests/unit/volume/drivers/netapp/dataontap/test_nfs_base.py b/cinder/tests/unit/volume/drivers/netapp/dataontap/test_nfs_base.py index 550da728a86..9c6e686cd91 100644 --- a/cinder/tests/unit/volume/drivers/netapp/dataontap/test_nfs_base.py +++ b/cinder/tests/unit/volume/drivers/netapp/dataontap/test_nfs_base.py @@ -16,7 +16,6 @@ """Unit tests for the NetApp NFS storage driver.""" import copy import os -import threading import time from unittest import mock @@ -620,36 +619,6 @@ class NetAppNfsDriverTestCase(test.TestCase): os.path.exists.assert_called_once_with( 'dir/' + fake.CLONE_DESTINATION_NAME) - def test__spawn_clean_cache_job_clean_job_setup(self): - self.driver.cleaning = True - mock_debug_log = self.mock_object(nfs_base.LOG, 'debug') - self.mock_object(utils, 'synchronized', return_value=lambda f: f) - - retval = self.driver._spawn_clean_cache_job() - - self.assertIsNone(retval) - self.assertEqual(1, mock_debug_log.call_count) - - def test__spawn_clean_cache_job_new_clean_job(self): - - class FakeTimer(object): - def start(self): - pass - - fake_timer = FakeTimer() - self.mock_object(utils, 'synchronized', return_value=lambda f: f) - self.mock_object(fake_timer, 'start') - self.mock_object(nfs_base.LOG, 'debug') - self.mock_object(self.driver, '_clean_image_cache') - self.mock_object(threading, 'Timer', return_value=fake_timer) - - retval = self.driver._spawn_clean_cache_job() - - self.assertIsNone(retval) - threading.Timer.assert_called_once_with( - 0, self.driver._clean_image_cache) - fake_timer.start.assert_called_once_with() - def test_cleanup_volume_on_failure(self): path = '%s/%s' % (fake.NFS_SHARE, fake.NFS_VOLUME['name']) mock_local_path = self.mock_object(self.driver, 'local_path') @@ -1077,13 +1046,22 @@ class NetAppNfsDriverTestCase(test.TestCase): self.driver, '_delete_snapshots_marked_for_deletion') mock_call_ems_logging = self.mock_object( self.driver, '_handle_ems_logging') + mock_call_clean_image_cache = self.mock_object( + self.driver, '_clean_image_cache') + + # image cache cleanup task can be configured with custom timeout + cache_cleanup_interval = loopingcalls.ONE_HOUR + self.driver.configuration.netapp_nfs_image_cache_cleanup_interval = ( + cache_cleanup_interval) self.driver._add_looping_tasks() mock_add_task.assert_has_calls([ mock.call(mock_call_snap_cleanup, loopingcalls.ONE_MINUTE, loopingcalls.ONE_MINUTE), - mock.call(mock_call_ems_logging, loopingcalls.ONE_HOUR)]) + mock.call(mock_call_ems_logging, loopingcalls.ONE_HOUR), + mock.call(mock_call_clean_image_cache, cache_cleanup_interval) + ]) def test__clone_from_cache(self): image_id = 'fake_image_id' diff --git a/cinder/tests/unit/volume/drivers/netapp/dataontap/test_nfs_cmode.py b/cinder/tests/unit/volume/drivers/netapp/dataontap/test_nfs_cmode.py index d443234c544..308e22c049c 100644 --- a/cinder/tests/unit/volume/drivers/netapp/dataontap/test_nfs_cmode.py +++ b/cinder/tests/unit/volume/drivers/netapp/dataontap/test_nfs_cmode.py @@ -137,7 +137,6 @@ class NetAppCmodeNfsDriverTestCase(test.TestCase): mock_debug_log = self.mock_object(nfs_cmode.LOG, 'debug') self.mock_object(self.driver, 'get_filter_function') self.mock_object(self.driver, 'get_goodness_function') - self.mock_object(self.driver, '_spawn_clean_cache_job') self.driver.zapi_client = mock.Mock() self.mock_object(self.driver, '_get_pool_stats', return_value={}) expected_stats = { @@ -153,7 +152,6 @@ class NetAppCmodeNfsDriverTestCase(test.TestCase): retval = self.driver._update_volume_stats() self.assertIsNone(retval) - self.assertTrue(self.driver._spawn_clean_cache_job.called) self.assertEqual(1, mock_debug_log.call_count) self.assertEqual(expected_stats, self.driver._stats) diff --git a/cinder/volume/drivers/netapp/dataontap/nfs_base.py b/cinder/volume/drivers/netapp/dataontap/nfs_base.py index 3b8a32266d2..5f913584f91 100644 --- a/cinder/volume/drivers/netapp/dataontap/nfs_base.py +++ b/cinder/volume/drivers/netapp/dataontap/nfs_base.py @@ -25,7 +25,6 @@ import copy import math import os import re -import threading import time from oslo_concurrency import processutils @@ -115,6 +114,13 @@ class NetAppNfsDriver(driver.ManageableVD, self._handle_ems_logging, loopingcalls.ONE_HOUR) + # Add the task that periodically cleanup old expired internal + # image caching. + self.loopingcalls.add_task( + self._clean_image_cache, + self.configuration.netapp_nfs_image_cache_cleanup_interval + ) + def _delete_snapshots_marked_for_deletion(self): snapshots = self.zapi_client.get_snapshots_marked_for_deletion() for snapshot in snapshots: @@ -546,49 +552,32 @@ class NetAppNfsDriver(driver.ManageableVD, os.utime(src_path, None) _do_clone() - @utils.synchronized('clean_cache') - def _spawn_clean_cache_job(self): - """Spawns a clean task if not running.""" - if getattr(self, 'cleaning', None): - LOG.debug('Image cache cleaning in progress. Returning... ') - return - else: - # Set cleaning to True - self.cleaning = True - t = threading.Timer(0, self._clean_image_cache) - t.start() - def _clean_image_cache(self): """Clean the image cache files in cache of space crunch.""" - try: - LOG.debug('Image cache cleaning in progress.') - thres_size_perc_start = ( - self.configuration.thres_avl_size_perc_start) - thres_size_perc_stop = self.configuration.thres_avl_size_perc_stop - for share in getattr(self, '_mounted_shares', []): - try: - total_size, total_avl = self._get_capacity_info(share) - avl_percent = int((float(total_avl) / total_size) * 100) - if avl_percent <= thres_size_perc_start: - LOG.info('Cleaning cache for share %s.', share) - eligible_files = self._find_old_cache_files(share) - threshold_size = int( - (thres_size_perc_stop * total_size) / 100) - bytes_to_free = int(threshold_size - total_avl) - LOG.debug('Files to be queued for deletion %s', - eligible_files) - self._delete_files_till_bytes_free( - eligible_files, share, bytes_to_free) - else: - continue - except Exception as e: - LOG.warning('Exception during cache cleaning' - ' %(share)s. Message - %(ex)s', - {'share': share, 'ex': e}) + LOG.debug('Image cache cleaning in progress.') + thres_size_perc_start = ( + self.configuration.thres_avl_size_perc_start) + thres_size_perc_stop = self.configuration.thres_avl_size_perc_stop + for share in self._mounted_shares: + try: + total_size, total_avl = self._get_capacity_info(share) + avl_percent = int((float(total_avl) / total_size) * 100) + if avl_percent <= thres_size_perc_start: + LOG.info('Cleaning cache for share %s.', share) + eligible_files = self._find_old_cache_files(share) + threshold_size = int( + (thres_size_perc_stop * total_size) / 100) + bytes_to_free = int(threshold_size - total_avl) + LOG.debug('Files to be queued for deletion %s', + eligible_files) + self._delete_files_till_bytes_free( + eligible_files, share, bytes_to_free) + else: continue - finally: - LOG.debug('Image cache cleaning done.') - self.cleaning = False + except Exception as e: + LOG.warning('Exception during cache cleaning' + ' %(share)s. Message - %(ex)s', + {'share': share, 'ex': e}) def _shortlist_del_eligible_files(self, share, old_files): """Prepares list of eligible files to be deleted from cache.""" diff --git a/cinder/volume/drivers/netapp/dataontap/nfs_cmode.py b/cinder/volume/drivers/netapp/dataontap/nfs_cmode.py index 0cccbef649f..c383db6bb3f 100644 --- a/cinder/volume/drivers/netapp/dataontap/nfs_cmode.py +++ b/cinder/volume/drivers/netapp/dataontap/nfs_cmode.py @@ -337,7 +337,6 @@ class NetAppCmodeNfsDriver(nfs_base.NetAppNfsDriver, # Used for service state report data['replication_enabled'] = self.replication_enabled - self._spawn_clean_cache_job() self._stats = data def _get_pool_stats(self, filter_function=None, goodness_function=None): diff --git a/cinder/volume/drivers/netapp/options.py b/cinder/volume/drivers/netapp/options.py index c32bd27990a..a8739a3cbf4 100644 --- a/cinder/volume/drivers/netapp/options.py +++ b/cinder/volume/drivers/netapp/options.py @@ -102,6 +102,11 @@ netapp_cluster_opts = [ 'provisioning of block storage volumes should occur.')), ] netapp_img_cache_opts = [ + cfg.IntOpt('netapp_nfs_image_cache_cleanup_interval', + default=600, + min=60, + help=('Sets time in seconds between NFS image cache ' + 'cleanup tasks.')), cfg.IntOpt('thres_avl_size_perc_start', default=20, help=('If the percentage of available space for an NFS share ' diff --git a/releasenotes/notes/bug-2008017-netapp-fix-native-threads-04d8f58f4c29b03d.yaml b/releasenotes/notes/bug-2008017-netapp-fix-native-threads-04d8f58f4c29b03d.yaml new file mode 100644 index 00000000000..c6b05987df6 --- /dev/null +++ b/releasenotes/notes/bug-2008017-netapp-fix-native-threads-04d8f58f4c29b03d.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + `Bug #2008017 `_: Fixed + NetApp NFS driver to never spawn a native thread avoid thread starvation + and other related issues.