RBD: Don't query Ceph on stats for exclusive pools

Collecting stats for provisioned_capacity_gb takes a long time since we
have to query each individual image for the provisioned size.  If we are
using the pool just for Cinder and/or are willing to accept a potential
deviation in Cinder stats we could just not retrieve this information
and calculate this based on the DB information for the volumes.

This patch adds configuration option `rbd_exclusive_cinder_pool` that
allows us to disable the size collection and thus improve the stats
reporting speed.

Change-Id: I32c7746fa9149bce6cdec96ee9aa87b303de4271
Closes-Bug: #1704106
This commit is contained in:
Gorka Eguileor 2018-02-06 14:54:57 +01:00
parent 98018cb429
commit f33baccc35
3 changed files with 83 additions and 8 deletions

View File

@ -141,6 +141,19 @@ CEPH_MON_DUMP = r"""dumped monmap epoch 1
""" """
class MockDriverConfig(object):
def __init__(self, **kwargs):
my_dict = vars(self)
my_dict.update(kwargs)
my_dict.setdefault('max_over_subscription_ratio', 1.0)
my_dict.setdefault('reserved_percentage', 0)
my_dict.setdefault('volume_backend_name', 'RBD')
my_dict.setdefault('_default', None)
def __call__(self, value):
return getattr(self, value, self._default)
def mock_driver_configuration(value): def mock_driver_configuration(value):
if value == 'max_over_subscription_ratio': if value == 'max_over_subscription_ratio':
return 1.0 return 1.0
@ -1191,8 +1204,9 @@ class RBDTestCase(test.TestCase):
expected['replication_targets'] = [t['backend_id']for t in targets] expected['replication_targets'] = [t['backend_id']for t in targets]
expected['replication_targets'].append('default') expected['replication_targets'].append('default')
my_safe_get = MockDriverConfig(rbd_exclusive_cinder_pool=False)
self.mock_object(self.driver.configuration, 'safe_get', self.mock_object(self.driver.configuration, 'safe_get',
mock_driver_configuration) my_safe_get)
with mock.patch.object(self.driver, '_get_fsid') as mock_get_fsid: with mock.patch.object(self.driver, '_get_fsid') as mock_get_fsid:
mock_get_fsid.return_value = expected_fsid mock_get_fsid.return_value = expected_fsid
@ -1202,9 +1216,46 @@ class RBDTestCase(test.TestCase):
@common_mocks @common_mocks
@mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_usage_info') @mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_usage_info')
@mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_pool_stats') @mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_pool_stats')
def test_update_volume_stats_error(self, stats_mock, usage_mock): def test_update_volume_stats_exclusive_pool(self, stats_mock, usage_mock):
stats_mock.return_value = (mock.sentinel.free_capacity_gb,
mock.sentinel.total_capacity_gb)
expected_fsid = 'abc'
expected_location_info = ('nondefault:%s:%s:%s:rbd' %
(self.cfg.rbd_ceph_conf, expected_fsid,
self.cfg.rbd_user))
expected = dict(
volume_backend_name='RBD',
replication_enabled=False,
vendor_name='Open Source',
driver_version=self.driver.VERSION,
storage_protocol='ceph',
total_capacity_gb=mock.sentinel.total_capacity_gb,
free_capacity_gb=mock.sentinel.free_capacity_gb,
reserved_percentage=0,
thin_provisioning_support=True,
max_over_subscription_ratio=1.0,
multiattach=False,
location_info=expected_location_info)
my_safe_get = MockDriverConfig(rbd_exclusive_cinder_pool=True)
self.mock_object(self.driver.configuration, 'safe_get', self.mock_object(self.driver.configuration, 'safe_get',
mock_driver_configuration) my_safe_get)
with mock.patch.object(self.driver, '_get_fsid',
return_value=expected_fsid):
actual = self.driver.get_volume_stats(True)
self.assertDictEqual(expected, actual)
usage_mock.assert_not_called()
@common_mocks
@mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_usage_info')
@mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_pool_stats')
def test_update_volume_stats_error(self, stats_mock, usage_mock):
my_safe_get = MockDriverConfig(rbd_exclusive_cinder_pool=False)
self.mock_object(self.driver.configuration, 'safe_get',
my_safe_get)
expected_fsid = 'abc' expected_fsid = 'abc'
expected_location_info = ('nondefault:%s:%s:%s:rbd' % expected_location_info = ('nondefault:%s:%s:%s:rbd' %
@ -1219,7 +1270,6 @@ class RBDTestCase(test.TestCase):
free_capacity_gb='unknown', free_capacity_gb='unknown',
reserved_percentage=0, reserved_percentage=0,
multiattach=False, multiattach=False,
provisioned_capacity_gb=0,
max_over_subscription_ratio=1.0, max_over_subscription_ratio=1.0,
thin_provisioning_support=True, thin_provisioning_support=True,
location_info=expected_location_info) location_info=expected_location_info)

View File

@ -103,6 +103,13 @@ RBD_OPTS = [
'dynamic value -used + current free- and to False to ' 'dynamic value -used + current free- and to False to '
'report a static value -quota max bytes if defined and ' 'report a static value -quota max bytes if defined and '
'global size of cluster if not-.'), 'global size of cluster if not-.'),
cfg.BoolOpt('rbd_exclusive_cinder_pool', default=False,
help="Set to True if the pool is used exclusively by Cinder. "
"On exclusive use driver won't query images' provisioned "
"size as they will match the value calculated by the "
"Cinder core code for allocated_capacity_gb. This "
"reduces the load on the Ceph cluster as well as on the "
"volume service."),
] ]
CONF = cfg.CONF CONF = cfg.CONF
@ -470,7 +477,6 @@ class RBDDriver(driver.CloneableImageVD,
'storage_protocol': 'ceph', 'storage_protocol': 'ceph',
'total_capacity_gb': 'unknown', 'total_capacity_gb': 'unknown',
'free_capacity_gb': 'unknown', 'free_capacity_gb': 'unknown',
'provisioned_capacity_gb': 0,
'reserved_percentage': ( 'reserved_percentage': (
self.configuration.safe_get('reserved_percentage')), self.configuration.safe_get('reserved_percentage')),
'multiattach': False, 'multiattach': False,
@ -492,10 +498,14 @@ class RBDDriver(driver.CloneableImageVD,
stats['free_capacity_gb'] = free_capacity stats['free_capacity_gb'] = free_capacity
stats['total_capacity_gb'] = total_capacity stats['total_capacity_gb'] = total_capacity
total_gbi = self._get_usage_info() # For exclusive pools let scheduler set provisioned_capacity_gb to
stats['provisioned_capacity_gb'] = total_gbi # allocated_capacity_gb, and for non exclusive query the value.
if not self.configuration.safe_get('rbd_exclusive_cinder_pool'):
total_gbi = self._get_usage_info()
stats['provisioned_capacity_gb'] = total_gbi
except self.rados.Error: except self.rados.Error:
# just log and return unknown capacities # just log and return unknown capacities and let scheduler set
# provisioned_capacity_gb = allocated_capacity_gb
LOG.exception('error refreshing volume stats') LOG.exception('error refreshing volume stats')
self._stats = stats self._stats = stats

View File

@ -0,0 +1,15 @@
---
features:
- |
When using the RBD pool exclusively for Cinder we can now set
`rbd_exclusive_cinder_pool` to `true` and Cinder will use DB information
to calculate provisioned size instead of querying all volumes in the
backend, which will reduce the load on the Ceph cluster and the volume
service.
issues:
- |
If RBD stats collection is taking too long in your environment maybe even
leading to the service appearing as down you'll want to use the
`rbd_exclusive_cinder_pool = true` configuration option if you are using
the pool exclusively for Cinder and maybe even if you are not and can live
with the innacuracy.