From 685e4c98eef0fbf73d1408d50383cfdaca583dcb Mon Sep 17 00:00:00 2001 From: Clinton Knight Date: Sun, 28 Aug 2016 22:12:25 -0400 Subject: [PATCH] NetApp: Report shared blocks exhaustion The maximum amount of shared (deduplicated, cloned) data on a Data ONTAP FlexVol (i.e. a Cinder pool) is 640TB. The only thing more surprising about that number is that we have customers hitting it. The symptom is that operations such as cloning Cinder volumes fail because no more blocks may be shared. The fix is to report the level of consumption to the scheduler for optional incorporation into the filter & goodness functions, so that pools nearing the shared block limit may be shielded from further provisioning requests. Implements: blueprint netapp-cdot-report-shared-blocks-exhaustion Change-Id: I01b7322f7ddb05ee5e28bcb1121a90a6ea307720 --- .../drivers/netapp/dataontap/client/fakes.py | 44 +++++++++ .../dataontap/client/test_client_cmode.py | 96 ++++++++++++++++++- .../netapp/dataontap/test_block_cmode.py | 10 +- .../netapp/dataontap/test_nfs_cmode.py | 4 + .../drivers/netapp/dataontap/block_cmode.py | 5 + .../netapp/dataontap/client/client_cmode.py | 62 +++++++++++- .../drivers/netapp/dataontap/nfs_cmode.py | 5 + ...ed_blocks_exhaustion-073a73e05daf09d4.yaml | 9 ++ 8 files changed, 227 insertions(+), 8 deletions(-) create mode 100644 releasenotes/notes/netapp_cdot_report_shared_blocks_exhaustion-073a73e05daf09d4.yaml diff --git a/cinder/tests/unit/volume/drivers/netapp/dataontap/client/fakes.py b/cinder/tests/unit/volume/drivers/netapp/dataontap/client/fakes.py index 2d64e62fdc2..f1927269da0 100644 --- a/cinder/tests/unit/volume/drivers/netapp/dataontap/client/fakes.py +++ b/cinder/tests/unit/volume/drivers/netapp/dataontap/client/fakes.py @@ -800,6 +800,8 @@ SIS_GET_ITER_SSC_RESPONSE = etree.XML(""" false enabled + 211106232532992 + 703687441776640 1 @@ -809,8 +811,50 @@ SIS_GET_ITER_SSC_RESPONSE = etree.XML(""" VOLUME_DEDUPE_INFO_SSC = { 'compression': False, 'dedupe': True, + 'logical-data-size': 211106232532992, + 'logical-data-limit': 703687441776640, } +SIS_GET_ITER_SSC_NO_LOGICAL_DATA_RESPONSE = etree.XML(""" + + + + false + disabled + + + 1 + +""") + +VOLUME_DEDUPE_INFO_SSC_NO_LOGICAL_DATA = { + 'compression': False, + 'dedupe': False, + 'logical-data-size': 0, + 'logical-data-limit': 1, +} + +CLONE_SPLIT_STATUS_RESPONSE = etree.XML(""" + + + 1234 + 316659348799488 + + +""") + +VOLUME_CLONE_SPLIT_STATUS = { + 'unsplit-size': 316659348799488, + 'unsplit-clone-count': 1234, +} + +CLONE_SPLIT_STATUS_NO_DATA_RESPONSE = etree.XML(""" + + + + +""") + STORAGE_DISK_GET_ITER_RESPONSE_PAGE_1 = etree.XML(""" diff --git a/cinder/tests/unit/volume/drivers/netapp/dataontap/client/test_client_cmode.py b/cinder/tests/unit/volume/drivers/netapp/dataontap/client/test_client_cmode.py index e9fe65d103e..e2463b35242 100644 --- a/cinder/tests/unit/volume/drivers/netapp/dataontap/client/test_client_cmode.py +++ b/cinder/tests/unit/volume/drivers/netapp/dataontap/client/test_client_cmode.py @@ -1638,6 +1638,8 @@ class NetAppCmodeClientTestCase(test.TestCase): 'sis-status-info': { 'state': None, 'is-compression-enabled': None, + 'logical-data-size': None, + 'logical-data-limit': None, }, }, } @@ -1645,6 +1647,20 @@ class NetAppCmodeClientTestCase(test.TestCase): 'sis-get-iter', sis_get_iter_args) self.assertEqual(fake_client.VOLUME_DEDUPE_INFO_SSC, result) + def test_get_flexvol_dedupe_info_no_logical_data_values(self): + + api_response = netapp_api.NaElement( + fake_client.SIS_GET_ITER_SSC_NO_LOGICAL_DATA_RESPONSE) + self.mock_object(self.client, + 'send_iter_request', + mock.Mock(return_value=api_response)) + + result = self.client.get_flexvol_dedupe_info( + fake_client.VOLUME_NAMES[0]) + + self.assertEqual(fake_client.VOLUME_DEDUPE_INFO_SSC_NO_LOGICAL_DATA, + result) + def test_get_flexvol_dedupe_info_not_found(self): api_response = netapp_api.NaElement( @@ -1656,8 +1672,8 @@ class NetAppCmodeClientTestCase(test.TestCase): result = self.client.get_flexvol_dedupe_info( fake_client.VOLUME_NAMES[0]) - expected = {'compression': False, 'dedupe': False} - self.assertEqual(expected, result) + self.assertEqual(fake_client.VOLUME_DEDUPE_INFO_SSC_NO_LOGICAL_DATA, + result) def test_get_flexvol_dedupe_info_api_error(self): @@ -1668,7 +1684,81 @@ class NetAppCmodeClientTestCase(test.TestCase): result = self.client.get_flexvol_dedupe_info( fake_client.VOLUME_NAMES[0]) - expected = {'compression': False, 'dedupe': False} + self.assertEqual(fake_client.VOLUME_DEDUPE_INFO_SSC_NO_LOGICAL_DATA, + result) + + def test_get_flexvol_dedupe_used_percent(self): + + self.client.features.add_feature('CLONE_SPLIT_STATUS') + mock_get_flexvol_dedupe_info = self.mock_object( + self.client, 'get_flexvol_dedupe_info', + mock.Mock(return_value=fake_client.VOLUME_DEDUPE_INFO_SSC)) + mock_get_clone_split_info = self.mock_object( + self.client, 'get_clone_split_info', + mock.Mock(return_value=fake_client.VOLUME_CLONE_SPLIT_STATUS)) + + result = self.client.get_flexvol_dedupe_used_percent( + fake_client.VOLUME_NAMES[0]) + + self.assertEqual(75.0, result) + mock_get_flexvol_dedupe_info.assert_called_once_with( + fake_client.VOLUME_NAMES[0]) + mock_get_clone_split_info.assert_called_once_with( + fake_client.VOLUME_NAMES[0]) + + def test_get_flexvol_dedupe_used_percent_not_supported(self): + + self.client.features.add_feature('CLONE_SPLIT_STATUS', supported=False) + mock_get_flexvol_dedupe_info = self.mock_object( + self.client, 'get_flexvol_dedupe_info', + mock.Mock(return_value=fake_client.VOLUME_DEDUPE_INFO_SSC)) + mock_get_clone_split_info = self.mock_object( + self.client, 'get_clone_split_info', + mock.Mock(return_value=fake_client.VOLUME_CLONE_SPLIT_STATUS)) + + result = self.client.get_flexvol_dedupe_used_percent( + fake_client.VOLUME_NAMES[0]) + + self.assertEqual(0.0, result) + self.assertFalse(mock_get_flexvol_dedupe_info.called) + self.assertFalse(mock_get_clone_split_info.called) + + def test_get_clone_split_info(self): + + api_response = netapp_api.NaElement( + fake_client.CLONE_SPLIT_STATUS_RESPONSE) + self.mock_object(self.client, + 'send_request', + mock.Mock(return_value=api_response)) + + result = self.client.get_clone_split_info(fake_client.VOLUME_NAMES[0]) + + self.assertEqual(fake_client.VOLUME_CLONE_SPLIT_STATUS, result) + self.client.send_request.assert_called_once_with( + 'clone-split-status', {'volume-name': fake_client.VOLUME_NAMES[0]}) + + def test_get_clone_split_info_api_error(self): + + self.mock_object(self.client, + 'send_request', + mock.Mock(side_effect=self._mock_api_error())) + + result = self.client.get_clone_split_info(fake_client.VOLUME_NAMES[0]) + + expected = {'unsplit-size': 0, 'unsplit-clone-count': 0} + self.assertEqual(expected, result) + + def test_get_clone_split_info_no_data(self): + + api_response = netapp_api.NaElement( + fake_client.CLONE_SPLIT_STATUS_NO_DATA_RESPONSE) + self.mock_object(self.client, + 'send_request', + mock.Mock(return_value=api_response)) + + result = self.client.get_clone_split_info(fake_client.VOLUME_NAMES[0]) + + expected = {'unsplit-size': 0, 'unsplit-clone-count': 0} self.assertEqual(expected, result) def test_is_flexvol_mirrored(self): diff --git a/cinder/tests/unit/volume/drivers/netapp/dataontap/test_block_cmode.py b/cinder/tests/unit/volume/drivers/netapp/dataontap/test_block_cmode.py index 5ee7cee764a..f8106f26e9c 100644 --- a/cinder/tests/unit/volume/drivers/netapp/dataontap/test_block_cmode.py +++ b/cinder/tests/unit/volume/drivers/netapp/dataontap/test_block_cmode.py @@ -373,9 +373,12 @@ class NetAppBlockStorageCmodeLibraryTestCase(test.TestCase): 'size-total': 10737418240.0, 'size-available': 2147483648.0, } - self.mock_object( - self.zapi_client, 'get_flexvol_capacity', - mock.Mock(return_value=mock_capacities)) + self.mock_object(self.zapi_client, + 'get_flexvol_capacity', + mock.Mock(return_value=mock_capacities)) + self.mock_object(self.zapi_client, + 'get_flexvol_dedupe_used_percent', + mock.Mock(return_value=55.0)) aggr_capacities = { 'aggr1': { @@ -401,6 +404,7 @@ class NetAppBlockStorageCmodeLibraryTestCase(test.TestCase): 'total_capacity_gb': 10.0, 'free_capacity_gb': 2.0, 'provisioned_capacity_gb': 8.0, + 'netapp_dedupe_used_percent': 55.0, 'netapp_aggregate_used_percent': 45, 'utilization': 30.0, 'filter_function': 'filter', diff --git a/cinder/tests/unit/volume/drivers/netapp/dataontap/test_nfs_cmode.py b/cinder/tests/unit/volume/drivers/netapp/dataontap/test_nfs_cmode.py index 7989cf9b5dd..84ea553a532 100644 --- a/cinder/tests/unit/volume/drivers/netapp/dataontap/test_nfs_cmode.py +++ b/cinder/tests/unit/volume/drivers/netapp/dataontap/test_nfs_cmode.py @@ -183,6 +183,9 @@ class NetAppCmodeNfsDriverTestCase(test.TestCase): self.mock_object(self.driver, '_get_share_capacity_info', mock.Mock(return_value=capacity)) + self.mock_object(self.driver.zapi_client, + 'get_flexvol_dedupe_used_percent', + mock.Mock(return_value=55.0)) aggr_capacities = { 'aggr1': { @@ -210,6 +213,7 @@ class NetAppCmodeNfsDriverTestCase(test.TestCase): 'total_capacity_gb': total_capacity_gb, 'free_capacity_gb': free_capacity_gb, 'provisioned_capacity_gb': provisioned_capacity_gb, + 'netapp_dedupe_used_percent': 55.0, 'netapp_aggregate_used_percent': 45, 'utilization': 30.0, 'filter_function': 'filter', diff --git a/cinder/volume/drivers/netapp/dataontap/block_cmode.py b/cinder/volume/drivers/netapp/dataontap/block_cmode.py index b97739b04f9..392bd54c4e3 100644 --- a/cinder/volume/drivers/netapp/dataontap/block_cmode.py +++ b/cinder/volume/drivers/netapp/dataontap/block_cmode.py @@ -290,6 +290,11 @@ class NetAppBlockStorageCmodeLibrary(block_base.NetAppBlockStorageLibrary, pool['provisioned_capacity_gb'] = round( pool['total_capacity_gb'] - pool['free_capacity_gb'], 2) + dedupe_used = self.zapi_client.get_flexvol_dedupe_used_percent( + ssc_vol_name) + pool['netapp_dedupe_used_percent'] = na_utils.round_down( + dedupe_used) + aggregate_name = ssc_vol_info.get('netapp_aggregate') aggr_capacity = aggr_capacities.get(aggregate_name, {}) pool['netapp_aggregate_used_percent'] = aggr_capacity.get( diff --git a/cinder/volume/drivers/netapp/dataontap/client/client_cmode.py b/cinder/volume/drivers/netapp/dataontap/client/client_cmode.py index 1e9464aa89d..83e4ea4fef5 100644 --- a/cinder/volume/drivers/netapp/dataontap/client/client_cmode.py +++ b/cinder/volume/drivers/netapp/dataontap/client/client_cmode.py @@ -66,6 +66,7 @@ class Client(client_base.Client): self.features.add_feature('USER_CAPABILITY_LIST', supported=ontapi_1_20) self.features.add_feature('SYSTEM_METRICS', supported=ontapi_1_2x) + self.features.add_feature('CLONE_SPLIT_STATUS', supported=ontapi_1_30) self.features.add_feature('FAST_CLONE_DELETE', supported=ontapi_1_30) self.features.add_feature('SYSTEM_CONSTITUENT_METRICS', supported=ontapi_1_30) @@ -985,19 +986,28 @@ class Client(client_base.Client): 'sis-status-info': { 'state': None, 'is-compression-enabled': None, + 'logical-data-size': None, + 'logical-data-limit': None, }, }, } + no_dedupe_response = { + 'compression': False, + 'dedupe': False, + 'logical-data-size': 0, + 'logical-data-limit': 1, + } + try: result = self.send_iter_request('sis-get-iter', api_args) except netapp_api.NaApiError: msg = _LE('Failed to get dedupe info for volume %s.') LOG.exception(msg, flexvol_name) - return {'compression': False, 'dedupe': False} + return no_dedupe_response if self._get_record_count(result) != 1: - return {'compression': False, 'dedupe': False} + return no_dedupe_response attributes_list = result.get_child_by_name( 'attributes-list') or netapp_api.NaElement('none') @@ -1005,15 +1015,63 @@ class Client(client_base.Client): sis_status_info = attributes_list.get_child_by_name( 'sis-status-info') or netapp_api.NaElement('none') + logical_data_size = sis_status_info.get_child_content( + 'logical-data-size') or 0 + logical_data_limit = sis_status_info.get_child_content( + 'logical-data-limit') or 1 + sis = { 'compression': strutils.bool_from_string( sis_status_info.get_child_content('is-compression-enabled')), 'dedupe': na_utils.to_bool( sis_status_info.get_child_content('state')), + 'logical-data-size': int(logical_data_size), + 'logical-data-limit': int(logical_data_limit), } return sis + def get_flexvol_dedupe_used_percent(self, flexvol_name): + """Determine how close a flexvol is to its shared block limit.""" + + # Note(cknight): The value returned by this method is computed from + # values returned by two different APIs, one of which was new in + # Data ONTAP 8.3. + if not self.features.CLONE_SPLIT_STATUS: + return 0.0 + + dedupe_info = self.get_flexvol_dedupe_info(flexvol_name) + clone_split_info = self.get_clone_split_info(flexvol_name) + + total_dedupe_blocks = (dedupe_info.get('logical-data-size') + + clone_split_info.get('unsplit-size')) + dedupe_used_percent = (100.0 * float(total_dedupe_blocks) / + dedupe_info.get('logical-data-limit')) + return dedupe_used_percent + + def get_clone_split_info(self, flexvol_name): + """Get the status of unsplit file/LUN clones in a flexvol.""" + + try: + result = self.send_request('clone-split-status', + {'volume-name': flexvol_name}) + except netapp_api.NaApiError: + msg = _LE('Failed to get clone split info for volume %s.') + LOG.exception(msg, flexvol_name) + return {'unsplit-size': 0, 'unsplit-clone-count': 0} + + clone_split_info = result.get_child_by_name( + 'clone-split-info') or netapp_api.NaElement('none') + + unsplit_size = clone_split_info.get_child_content('unsplit-size') or 0 + unsplit_clone_count = clone_split_info.get_child_content( + 'unsplit-clone-count') or 0 + + return { + 'unsplit-size': int(unsplit_size), + 'unsplit-clone-count': int(unsplit_clone_count), + } + def is_flexvol_mirrored(self, flexvol_name, vserver_name): """Check if flexvol is a SnapMirror source.""" diff --git a/cinder/volume/drivers/netapp/dataontap/nfs_cmode.py b/cinder/volume/drivers/netapp/dataontap/nfs_cmode.py index 6e5471e0603..f3134de74d5 100644 --- a/cinder/volume/drivers/netapp/dataontap/nfs_cmode.py +++ b/cinder/volume/drivers/netapp/dataontap/nfs_cmode.py @@ -246,6 +246,11 @@ class NetAppCmodeNfsDriver(nfs_base.NetAppNfsDriver, capacity = self._get_share_capacity_info(nfs_share) pool.update(capacity) + dedupe_used = self.zapi_client.get_flexvol_dedupe_used_percent( + ssc_vol_name) + pool['netapp_dedupe_used_percent'] = na_utils.round_down( + dedupe_used) + aggregate_name = ssc_vol_info.get('netapp_aggregate') aggr_capacity = aggr_capacities.get(aggregate_name, {}) pool['netapp_aggregate_used_percent'] = aggr_capacity.get( diff --git a/releasenotes/notes/netapp_cdot_report_shared_blocks_exhaustion-073a73e05daf09d4.yaml b/releasenotes/notes/netapp_cdot_report_shared_blocks_exhaustion-073a73e05daf09d4.yaml new file mode 100644 index 00000000000..24536b388d5 --- /dev/null +++ b/releasenotes/notes/netapp_cdot_report_shared_blocks_exhaustion-073a73e05daf09d4.yaml @@ -0,0 +1,9 @@ +--- +features: + - The NetApp cDOT drivers report to the scheduler, + for each FlexVol pool, the fraction of the shared + block limit that has been consumed by dedupe and + cloning operations. This value, netapp_dedupe_used_percent, + may be used in the filter & goodness functions for better + placement of new Cinder volumes. +