From f33baccc3544cbda6cd5908328a56096046657ed Mon Sep 17 00:00:00 2001 From: Gorka Eguileor Date: Tue, 6 Feb 2018 14:54:57 +0100 Subject: [PATCH] RBD: Don't query Ceph on stats for exclusive pools Collecting stats for provisioned_capacity_gb takes a long time since we have to query each individual image for the provisioned size. If we are using the pool just for Cinder and/or are willing to accept a potential deviation in Cinder stats we could just not retrieve this information and calculate this based on the DB information for the volumes. This patch adds configuration option `rbd_exclusive_cinder_pool` that allows us to disable the size collection and thus improve the stats reporting speed. Change-Id: I32c7746fa9149bce6cdec96ee9aa87b303de4271 Closes-Bug: #1704106 --- cinder/tests/unit/volume/drivers/test_rbd.py | 58 +++++++++++++++++-- cinder/volume/drivers/rbd.py | 18 ++++-- ...e-rbd-exclusive-pool-a9bdebdeb1f0bf37.yaml | 15 +++++ 3 files changed, 83 insertions(+), 8 deletions(-) create mode 100644 releasenotes/notes/feature-rbd-exclusive-pool-a9bdebdeb1f0bf37.yaml diff --git a/cinder/tests/unit/volume/drivers/test_rbd.py b/cinder/tests/unit/volume/drivers/test_rbd.py index fc4ff530511..168ca233f43 100644 --- a/cinder/tests/unit/volume/drivers/test_rbd.py +++ b/cinder/tests/unit/volume/drivers/test_rbd.py @@ -141,6 +141,19 @@ CEPH_MON_DUMP = r"""dumped monmap epoch 1 """ +class MockDriverConfig(object): + def __init__(self, **kwargs): + my_dict = vars(self) + my_dict.update(kwargs) + my_dict.setdefault('max_over_subscription_ratio', 1.0) + my_dict.setdefault('reserved_percentage', 0) + my_dict.setdefault('volume_backend_name', 'RBD') + my_dict.setdefault('_default', None) + + def __call__(self, value): + return getattr(self, value, self._default) + + def mock_driver_configuration(value): if value == 'max_over_subscription_ratio': return 1.0 @@ -1191,8 +1204,9 @@ class RBDTestCase(test.TestCase): expected['replication_targets'] = [t['backend_id']for t in targets] expected['replication_targets'].append('default') + my_safe_get = MockDriverConfig(rbd_exclusive_cinder_pool=False) self.mock_object(self.driver.configuration, 'safe_get', - mock_driver_configuration) + my_safe_get) with mock.patch.object(self.driver, '_get_fsid') as mock_get_fsid: mock_get_fsid.return_value = expected_fsid @@ -1202,9 +1216,46 @@ class RBDTestCase(test.TestCase): @common_mocks @mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_usage_info') @mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_pool_stats') - def test_update_volume_stats_error(self, stats_mock, usage_mock): + def test_update_volume_stats_exclusive_pool(self, stats_mock, usage_mock): + stats_mock.return_value = (mock.sentinel.free_capacity_gb, + mock.sentinel.total_capacity_gb) + + expected_fsid = 'abc' + expected_location_info = ('nondefault:%s:%s:%s:rbd' % + (self.cfg.rbd_ceph_conf, expected_fsid, + self.cfg.rbd_user)) + expected = dict( + volume_backend_name='RBD', + replication_enabled=False, + vendor_name='Open Source', + driver_version=self.driver.VERSION, + storage_protocol='ceph', + total_capacity_gb=mock.sentinel.total_capacity_gb, + free_capacity_gb=mock.sentinel.free_capacity_gb, + reserved_percentage=0, + thin_provisioning_support=True, + max_over_subscription_ratio=1.0, + multiattach=False, + location_info=expected_location_info) + + my_safe_get = MockDriverConfig(rbd_exclusive_cinder_pool=True) self.mock_object(self.driver.configuration, 'safe_get', - mock_driver_configuration) + my_safe_get) + + with mock.patch.object(self.driver, '_get_fsid', + return_value=expected_fsid): + actual = self.driver.get_volume_stats(True) + + self.assertDictEqual(expected, actual) + usage_mock.assert_not_called() + + @common_mocks + @mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_usage_info') + @mock.patch('cinder.volume.drivers.rbd.RBDDriver._get_pool_stats') + def test_update_volume_stats_error(self, stats_mock, usage_mock): + my_safe_get = MockDriverConfig(rbd_exclusive_cinder_pool=False) + self.mock_object(self.driver.configuration, 'safe_get', + my_safe_get) expected_fsid = 'abc' expected_location_info = ('nondefault:%s:%s:%s:rbd' % @@ -1219,7 +1270,6 @@ class RBDTestCase(test.TestCase): free_capacity_gb='unknown', reserved_percentage=0, multiattach=False, - provisioned_capacity_gb=0, max_over_subscription_ratio=1.0, thin_provisioning_support=True, location_info=expected_location_info) diff --git a/cinder/volume/drivers/rbd.py b/cinder/volume/drivers/rbd.py index 885cf803198..371adbee9c7 100644 --- a/cinder/volume/drivers/rbd.py +++ b/cinder/volume/drivers/rbd.py @@ -103,6 +103,13 @@ RBD_OPTS = [ 'dynamic value -used + current free- and to False to ' 'report a static value -quota max bytes if defined and ' 'global size of cluster if not-.'), + cfg.BoolOpt('rbd_exclusive_cinder_pool', default=False, + help="Set to True if the pool is used exclusively by Cinder. " + "On exclusive use driver won't query images' provisioned " + "size as they will match the value calculated by the " + "Cinder core code for allocated_capacity_gb. This " + "reduces the load on the Ceph cluster as well as on the " + "volume service."), ] CONF = cfg.CONF @@ -470,7 +477,6 @@ class RBDDriver(driver.CloneableImageVD, 'storage_protocol': 'ceph', 'total_capacity_gb': 'unknown', 'free_capacity_gb': 'unknown', - 'provisioned_capacity_gb': 0, 'reserved_percentage': ( self.configuration.safe_get('reserved_percentage')), 'multiattach': False, @@ -492,10 +498,14 @@ class RBDDriver(driver.CloneableImageVD, stats['free_capacity_gb'] = free_capacity stats['total_capacity_gb'] = total_capacity - total_gbi = self._get_usage_info() - stats['provisioned_capacity_gb'] = total_gbi + # For exclusive pools let scheduler set provisioned_capacity_gb to + # allocated_capacity_gb, and for non exclusive query the value. + if not self.configuration.safe_get('rbd_exclusive_cinder_pool'): + total_gbi = self._get_usage_info() + stats['provisioned_capacity_gb'] = total_gbi except self.rados.Error: - # just log and return unknown capacities + # just log and return unknown capacities and let scheduler set + # provisioned_capacity_gb = allocated_capacity_gb LOG.exception('error refreshing volume stats') self._stats = stats diff --git a/releasenotes/notes/feature-rbd-exclusive-pool-a9bdebdeb1f0bf37.yaml b/releasenotes/notes/feature-rbd-exclusive-pool-a9bdebdeb1f0bf37.yaml new file mode 100644 index 00000000000..6d32802c7ff --- /dev/null +++ b/releasenotes/notes/feature-rbd-exclusive-pool-a9bdebdeb1f0bf37.yaml @@ -0,0 +1,15 @@ +--- +features: + - | + When using the RBD pool exclusively for Cinder we can now set + `rbd_exclusive_cinder_pool` to `true` and Cinder will use DB information + to calculate provisioned size instead of querying all volumes in the + backend, which will reduce the load on the Ceph cluster and the volume + service. +issues: + - | + If RBD stats collection is taking too long in your environment maybe even + leading to the service appearing as down you'll want to use the + `rbd_exclusive_cinder_pool = true` configuration option if you are using + the pool exclusively for Cinder and maybe even if you are not and can live + with the innacuracy.