NetApp: Report shared blocks exhaustion

The maximum amount of shared (deduplicated, cloned) data on
a Data ONTAP FlexVol (i.e. a Cinder pool) is 640TB.  The only thing
more surprising about that number is that we have customers hitting
it. The symptom is that operations such as cloning Cinder volumes
fail because no more blocks may be shared. The fix is to report the
level of consumption to the scheduler for optional incorporation
into the filter & goodness functions, so that pools nearing the shared
block limit may be shielded from further provisioning requests.

Implements: blueprint netapp-cdot-report-shared-blocks-exhaustion
Change-Id: I01b7322f7ddb05ee5e28bcb1121a90a6ea307720
This commit is contained in:
Clinton Knight 2016-08-28 22:12:25 -04:00
parent e6642d97fd
commit 685e4c98ee
8 changed files with 227 additions and 8 deletions

View File

@ -800,6 +800,8 @@ SIS_GET_ITER_SSC_RESPONSE = etree.XML("""
<sis-status-info> <sis-status-info>
<is-compression-enabled>false</is-compression-enabled> <is-compression-enabled>false</is-compression-enabled>
<state>enabled</state> <state>enabled</state>
<logical-data-size>211106232532992</logical-data-size>
<logical-data-limit>703687441776640</logical-data-limit>
</sis-status-info> </sis-status-info>
</attributes-list> </attributes-list>
<num-records>1</num-records> <num-records>1</num-records>
@ -809,8 +811,50 @@ SIS_GET_ITER_SSC_RESPONSE = etree.XML("""
VOLUME_DEDUPE_INFO_SSC = { VOLUME_DEDUPE_INFO_SSC = {
'compression': False, 'compression': False,
'dedupe': True, 'dedupe': True,
'logical-data-size': 211106232532992,
'logical-data-limit': 703687441776640,
} }
SIS_GET_ITER_SSC_NO_LOGICAL_DATA_RESPONSE = etree.XML("""
<results status="passed">
<attributes-list>
<sis-status-info>
<is-compression-enabled>false</is-compression-enabled>
<state>disabled</state>
</sis-status-info>
</attributes-list>
<num-records>1</num-records>
</results>
""")
VOLUME_DEDUPE_INFO_SSC_NO_LOGICAL_DATA = {
'compression': False,
'dedupe': False,
'logical-data-size': 0,
'logical-data-limit': 1,
}
CLONE_SPLIT_STATUS_RESPONSE = etree.XML("""
<results status="passed">
<clone-split-info>
<unsplit-clone-count>1234</unsplit-clone-count>
<unsplit-size>316659348799488</unsplit-size>
</clone-split-info>
</results>
""")
VOLUME_CLONE_SPLIT_STATUS = {
'unsplit-size': 316659348799488,
'unsplit-clone-count': 1234,
}
CLONE_SPLIT_STATUS_NO_DATA_RESPONSE = etree.XML("""
<results status="passed">
<clone-split-info>
</clone-split-info>
</results>
""")
STORAGE_DISK_GET_ITER_RESPONSE_PAGE_1 = etree.XML(""" STORAGE_DISK_GET_ITER_RESPONSE_PAGE_1 = etree.XML("""
<results status="passed"> <results status="passed">
<attributes-list> <attributes-list>

View File

@ -1638,6 +1638,8 @@ class NetAppCmodeClientTestCase(test.TestCase):
'sis-status-info': { 'sis-status-info': {
'state': None, 'state': None,
'is-compression-enabled': None, 'is-compression-enabled': None,
'logical-data-size': None,
'logical-data-limit': None,
}, },
}, },
} }
@ -1645,6 +1647,20 @@ class NetAppCmodeClientTestCase(test.TestCase):
'sis-get-iter', sis_get_iter_args) 'sis-get-iter', sis_get_iter_args)
self.assertEqual(fake_client.VOLUME_DEDUPE_INFO_SSC, result) self.assertEqual(fake_client.VOLUME_DEDUPE_INFO_SSC, result)
def test_get_flexvol_dedupe_info_no_logical_data_values(self):
api_response = netapp_api.NaElement(
fake_client.SIS_GET_ITER_SSC_NO_LOGICAL_DATA_RESPONSE)
self.mock_object(self.client,
'send_iter_request',
mock.Mock(return_value=api_response))
result = self.client.get_flexvol_dedupe_info(
fake_client.VOLUME_NAMES[0])
self.assertEqual(fake_client.VOLUME_DEDUPE_INFO_SSC_NO_LOGICAL_DATA,
result)
def test_get_flexvol_dedupe_info_not_found(self): def test_get_flexvol_dedupe_info_not_found(self):
api_response = netapp_api.NaElement( api_response = netapp_api.NaElement(
@ -1656,8 +1672,8 @@ class NetAppCmodeClientTestCase(test.TestCase):
result = self.client.get_flexvol_dedupe_info( result = self.client.get_flexvol_dedupe_info(
fake_client.VOLUME_NAMES[0]) fake_client.VOLUME_NAMES[0])
expected = {'compression': False, 'dedupe': False} self.assertEqual(fake_client.VOLUME_DEDUPE_INFO_SSC_NO_LOGICAL_DATA,
self.assertEqual(expected, result) result)
def test_get_flexvol_dedupe_info_api_error(self): def test_get_flexvol_dedupe_info_api_error(self):
@ -1668,7 +1684,81 @@ class NetAppCmodeClientTestCase(test.TestCase):
result = self.client.get_flexvol_dedupe_info( result = self.client.get_flexvol_dedupe_info(
fake_client.VOLUME_NAMES[0]) fake_client.VOLUME_NAMES[0])
expected = {'compression': False, 'dedupe': False} self.assertEqual(fake_client.VOLUME_DEDUPE_INFO_SSC_NO_LOGICAL_DATA,
result)
def test_get_flexvol_dedupe_used_percent(self):
self.client.features.add_feature('CLONE_SPLIT_STATUS')
mock_get_flexvol_dedupe_info = self.mock_object(
self.client, 'get_flexvol_dedupe_info',
mock.Mock(return_value=fake_client.VOLUME_DEDUPE_INFO_SSC))
mock_get_clone_split_info = self.mock_object(
self.client, 'get_clone_split_info',
mock.Mock(return_value=fake_client.VOLUME_CLONE_SPLIT_STATUS))
result = self.client.get_flexvol_dedupe_used_percent(
fake_client.VOLUME_NAMES[0])
self.assertEqual(75.0, result)
mock_get_flexvol_dedupe_info.assert_called_once_with(
fake_client.VOLUME_NAMES[0])
mock_get_clone_split_info.assert_called_once_with(
fake_client.VOLUME_NAMES[0])
def test_get_flexvol_dedupe_used_percent_not_supported(self):
self.client.features.add_feature('CLONE_SPLIT_STATUS', supported=False)
mock_get_flexvol_dedupe_info = self.mock_object(
self.client, 'get_flexvol_dedupe_info',
mock.Mock(return_value=fake_client.VOLUME_DEDUPE_INFO_SSC))
mock_get_clone_split_info = self.mock_object(
self.client, 'get_clone_split_info',
mock.Mock(return_value=fake_client.VOLUME_CLONE_SPLIT_STATUS))
result = self.client.get_flexvol_dedupe_used_percent(
fake_client.VOLUME_NAMES[0])
self.assertEqual(0.0, result)
self.assertFalse(mock_get_flexvol_dedupe_info.called)
self.assertFalse(mock_get_clone_split_info.called)
def test_get_clone_split_info(self):
api_response = netapp_api.NaElement(
fake_client.CLONE_SPLIT_STATUS_RESPONSE)
self.mock_object(self.client,
'send_request',
mock.Mock(return_value=api_response))
result = self.client.get_clone_split_info(fake_client.VOLUME_NAMES[0])
self.assertEqual(fake_client.VOLUME_CLONE_SPLIT_STATUS, result)
self.client.send_request.assert_called_once_with(
'clone-split-status', {'volume-name': fake_client.VOLUME_NAMES[0]})
def test_get_clone_split_info_api_error(self):
self.mock_object(self.client,
'send_request',
mock.Mock(side_effect=self._mock_api_error()))
result = self.client.get_clone_split_info(fake_client.VOLUME_NAMES[0])
expected = {'unsplit-size': 0, 'unsplit-clone-count': 0}
self.assertEqual(expected, result)
def test_get_clone_split_info_no_data(self):
api_response = netapp_api.NaElement(
fake_client.CLONE_SPLIT_STATUS_NO_DATA_RESPONSE)
self.mock_object(self.client,
'send_request',
mock.Mock(return_value=api_response))
result = self.client.get_clone_split_info(fake_client.VOLUME_NAMES[0])
expected = {'unsplit-size': 0, 'unsplit-clone-count': 0}
self.assertEqual(expected, result) self.assertEqual(expected, result)
def test_is_flexvol_mirrored(self): def test_is_flexvol_mirrored(self):

View File

@ -373,9 +373,12 @@ class NetAppBlockStorageCmodeLibraryTestCase(test.TestCase):
'size-total': 10737418240.0, 'size-total': 10737418240.0,
'size-available': 2147483648.0, 'size-available': 2147483648.0,
} }
self.mock_object( self.mock_object(self.zapi_client,
self.zapi_client, 'get_flexvol_capacity', 'get_flexvol_capacity',
mock.Mock(return_value=mock_capacities)) mock.Mock(return_value=mock_capacities))
self.mock_object(self.zapi_client,
'get_flexvol_dedupe_used_percent',
mock.Mock(return_value=55.0))
aggr_capacities = { aggr_capacities = {
'aggr1': { 'aggr1': {
@ -401,6 +404,7 @@ class NetAppBlockStorageCmodeLibraryTestCase(test.TestCase):
'total_capacity_gb': 10.0, 'total_capacity_gb': 10.0,
'free_capacity_gb': 2.0, 'free_capacity_gb': 2.0,
'provisioned_capacity_gb': 8.0, 'provisioned_capacity_gb': 8.0,
'netapp_dedupe_used_percent': 55.0,
'netapp_aggregate_used_percent': 45, 'netapp_aggregate_used_percent': 45,
'utilization': 30.0, 'utilization': 30.0,
'filter_function': 'filter', 'filter_function': 'filter',

View File

@ -183,6 +183,9 @@ class NetAppCmodeNfsDriverTestCase(test.TestCase):
self.mock_object(self.driver, self.mock_object(self.driver,
'_get_share_capacity_info', '_get_share_capacity_info',
mock.Mock(return_value=capacity)) mock.Mock(return_value=capacity))
self.mock_object(self.driver.zapi_client,
'get_flexvol_dedupe_used_percent',
mock.Mock(return_value=55.0))
aggr_capacities = { aggr_capacities = {
'aggr1': { 'aggr1': {
@ -210,6 +213,7 @@ class NetAppCmodeNfsDriverTestCase(test.TestCase):
'total_capacity_gb': total_capacity_gb, 'total_capacity_gb': total_capacity_gb,
'free_capacity_gb': free_capacity_gb, 'free_capacity_gb': free_capacity_gb,
'provisioned_capacity_gb': provisioned_capacity_gb, 'provisioned_capacity_gb': provisioned_capacity_gb,
'netapp_dedupe_used_percent': 55.0,
'netapp_aggregate_used_percent': 45, 'netapp_aggregate_used_percent': 45,
'utilization': 30.0, 'utilization': 30.0,
'filter_function': 'filter', 'filter_function': 'filter',

View File

@ -290,6 +290,11 @@ class NetAppBlockStorageCmodeLibrary(block_base.NetAppBlockStorageLibrary,
pool['provisioned_capacity_gb'] = round( pool['provisioned_capacity_gb'] = round(
pool['total_capacity_gb'] - pool['free_capacity_gb'], 2) pool['total_capacity_gb'] - pool['free_capacity_gb'], 2)
dedupe_used = self.zapi_client.get_flexvol_dedupe_used_percent(
ssc_vol_name)
pool['netapp_dedupe_used_percent'] = na_utils.round_down(
dedupe_used)
aggregate_name = ssc_vol_info.get('netapp_aggregate') aggregate_name = ssc_vol_info.get('netapp_aggregate')
aggr_capacity = aggr_capacities.get(aggregate_name, {}) aggr_capacity = aggr_capacities.get(aggregate_name, {})
pool['netapp_aggregate_used_percent'] = aggr_capacity.get( pool['netapp_aggregate_used_percent'] = aggr_capacity.get(

View File

@ -66,6 +66,7 @@ class Client(client_base.Client):
self.features.add_feature('USER_CAPABILITY_LIST', self.features.add_feature('USER_CAPABILITY_LIST',
supported=ontapi_1_20) supported=ontapi_1_20)
self.features.add_feature('SYSTEM_METRICS', supported=ontapi_1_2x) self.features.add_feature('SYSTEM_METRICS', supported=ontapi_1_2x)
self.features.add_feature('CLONE_SPLIT_STATUS', supported=ontapi_1_30)
self.features.add_feature('FAST_CLONE_DELETE', supported=ontapi_1_30) self.features.add_feature('FAST_CLONE_DELETE', supported=ontapi_1_30)
self.features.add_feature('SYSTEM_CONSTITUENT_METRICS', self.features.add_feature('SYSTEM_CONSTITUENT_METRICS',
supported=ontapi_1_30) supported=ontapi_1_30)
@ -985,19 +986,28 @@ class Client(client_base.Client):
'sis-status-info': { 'sis-status-info': {
'state': None, 'state': None,
'is-compression-enabled': None, 'is-compression-enabled': None,
'logical-data-size': None,
'logical-data-limit': None,
}, },
}, },
} }
no_dedupe_response = {
'compression': False,
'dedupe': False,
'logical-data-size': 0,
'logical-data-limit': 1,
}
try: try:
result = self.send_iter_request('sis-get-iter', api_args) result = self.send_iter_request('sis-get-iter', api_args)
except netapp_api.NaApiError: except netapp_api.NaApiError:
msg = _LE('Failed to get dedupe info for volume %s.') msg = _LE('Failed to get dedupe info for volume %s.')
LOG.exception(msg, flexvol_name) LOG.exception(msg, flexvol_name)
return {'compression': False, 'dedupe': False} return no_dedupe_response
if self._get_record_count(result) != 1: if self._get_record_count(result) != 1:
return {'compression': False, 'dedupe': False} return no_dedupe_response
attributes_list = result.get_child_by_name( attributes_list = result.get_child_by_name(
'attributes-list') or netapp_api.NaElement('none') 'attributes-list') or netapp_api.NaElement('none')
@ -1005,15 +1015,63 @@ class Client(client_base.Client):
sis_status_info = attributes_list.get_child_by_name( sis_status_info = attributes_list.get_child_by_name(
'sis-status-info') or netapp_api.NaElement('none') 'sis-status-info') or netapp_api.NaElement('none')
logical_data_size = sis_status_info.get_child_content(
'logical-data-size') or 0
logical_data_limit = sis_status_info.get_child_content(
'logical-data-limit') or 1
sis = { sis = {
'compression': strutils.bool_from_string( 'compression': strutils.bool_from_string(
sis_status_info.get_child_content('is-compression-enabled')), sis_status_info.get_child_content('is-compression-enabled')),
'dedupe': na_utils.to_bool( 'dedupe': na_utils.to_bool(
sis_status_info.get_child_content('state')), sis_status_info.get_child_content('state')),
'logical-data-size': int(logical_data_size),
'logical-data-limit': int(logical_data_limit),
} }
return sis return sis
def get_flexvol_dedupe_used_percent(self, flexvol_name):
"""Determine how close a flexvol is to its shared block limit."""
# Note(cknight): The value returned by this method is computed from
# values returned by two different APIs, one of which was new in
# Data ONTAP 8.3.
if not self.features.CLONE_SPLIT_STATUS:
return 0.0
dedupe_info = self.get_flexvol_dedupe_info(flexvol_name)
clone_split_info = self.get_clone_split_info(flexvol_name)
total_dedupe_blocks = (dedupe_info.get('logical-data-size') +
clone_split_info.get('unsplit-size'))
dedupe_used_percent = (100.0 * float(total_dedupe_blocks) /
dedupe_info.get('logical-data-limit'))
return dedupe_used_percent
def get_clone_split_info(self, flexvol_name):
"""Get the status of unsplit file/LUN clones in a flexvol."""
try:
result = self.send_request('clone-split-status',
{'volume-name': flexvol_name})
except netapp_api.NaApiError:
msg = _LE('Failed to get clone split info for volume %s.')
LOG.exception(msg, flexvol_name)
return {'unsplit-size': 0, 'unsplit-clone-count': 0}
clone_split_info = result.get_child_by_name(
'clone-split-info') or netapp_api.NaElement('none')
unsplit_size = clone_split_info.get_child_content('unsplit-size') or 0
unsplit_clone_count = clone_split_info.get_child_content(
'unsplit-clone-count') or 0
return {
'unsplit-size': int(unsplit_size),
'unsplit-clone-count': int(unsplit_clone_count),
}
def is_flexvol_mirrored(self, flexvol_name, vserver_name): def is_flexvol_mirrored(self, flexvol_name, vserver_name):
"""Check if flexvol is a SnapMirror source.""" """Check if flexvol is a SnapMirror source."""

View File

@ -246,6 +246,11 @@ class NetAppCmodeNfsDriver(nfs_base.NetAppNfsDriver,
capacity = self._get_share_capacity_info(nfs_share) capacity = self._get_share_capacity_info(nfs_share)
pool.update(capacity) pool.update(capacity)
dedupe_used = self.zapi_client.get_flexvol_dedupe_used_percent(
ssc_vol_name)
pool['netapp_dedupe_used_percent'] = na_utils.round_down(
dedupe_used)
aggregate_name = ssc_vol_info.get('netapp_aggregate') aggregate_name = ssc_vol_info.get('netapp_aggregate')
aggr_capacity = aggr_capacities.get(aggregate_name, {}) aggr_capacity = aggr_capacities.get(aggregate_name, {})
pool['netapp_aggregate_used_percent'] = aggr_capacity.get( pool['netapp_aggregate_used_percent'] = aggr_capacity.get(

View File

@ -0,0 +1,9 @@
---
features:
- The NetApp cDOT drivers report to the scheduler,
for each FlexVol pool, the fraction of the shared
block limit that has been consumed by dedupe and
cloning operations. This value, netapp_dedupe_used_percent,
may be used in the filter & goodness functions for better
placement of new Cinder volumes.