From 44254ca865515c2ecd91886f0100ada874a40abe Mon Sep 17 00:00:00 2001 From: melanie witt Date: Tue, 27 Mar 2018 01:27:56 +0000 Subject: [PATCH] rbd: use MAX_AVAIL stat for reporting bytes available Currently, the reporting of bytes available works well for recommended Ceph deployments that run one OSD per disk [1]. However, for users who are running multiple OSDs on a single disk, the current reporting will reflect bytes available * number of replicas. We can enhance the bytes available reporting method to accomodate unrecommended Ceph deployments by using the MAX_AVAIL stat obtainable via the 'ceph df' command. The MAX_AVAIL stat takes the number of configured replicas into consideration and will reflect the correct number of bytes available even when Ceph is deployed in a way the documentation recommends against. For most users, this change should make no difference. It will only be a help for users who are running unrecommended Ceph deployments. [1] http://docs.ceph.com/docs/luminous/start/hardware-recommendations/#hard-disk-drives Change-Id: I96faff6d3b9747514441d83c629fdd1cface1eb5 --- .../unit/virt/libvirt/storage/test_rbd.py | 64 +++++++++++++++++++ nova/virt/libvirt/storage/rbd_utils.py | 32 ++++++++-- ...nhance-get-pool-info-14afc8eccab49dcf.yaml | 9 +++ 3 files changed, 99 insertions(+), 6 deletions(-) create mode 100644 releasenotes/notes/rbd-enhance-get-pool-info-14afc8eccab49dcf.yaml diff --git a/nova/tests/unit/virt/libvirt/storage/test_rbd.py b/nova/tests/unit/virt/libvirt/storage/test_rbd.py index f6629b79f183..f38830d0ee2a 100644 --- a/nova/tests/unit/virt/libvirt/storage/test_rbd.py +++ b/nova/tests/unit/virt/libvirt/storage/test_rbd.py @@ -13,6 +13,7 @@ from eventlet import tpool import mock +from oslo_serialization import jsonutils from oslo_utils.fixture import uuidsentinel as uuids from nova.compute import task_states @@ -51,6 +52,53 @@ CEPH_MON_DUMP = """dumped monmap epoch 1 """ +# max_avail stats are tweaked for testing +CEPH_DF = """ +{ + "stats": { + "total_bytes": 25757220864, + "total_used_bytes": 274190336, + "total_avail_bytes": 25483030528 + }, + "pools": [ + { + "name": "images", + "id": 1, + "stats": { + "kb_used": 12419, + "bytes_used": 12716067, + "percent_used": 0.05, + "max_avail": 24195168123, + "objects": 6 + } + }, + { + "name": "rbd", + "id": 2, + "stats": { + "kb_used": 0, + "bytes_used": 0, + "percent_used": 0.00, + "max_avail": 24195168456, + "objects": 0 + } + }, + { + "name": "volumes", + "id": 3, + "stats": { + "kb_used": 0, + "bytes_used": 0, + "percent_used": 0.00, + "max_avail": 24195168789, + "objects": 0 + } + } + ] +} +""" + + class FakeException(Exception): pass @@ -557,3 +605,19 @@ class RbdTestCase(test.NoDBTestCase): proxy.list_snaps.return_value = [{'name': self.snap_name}, ] self.driver.rollback_to_snap(self.volume_name, self.snap_name) proxy.rollback_to_snap.assert_called_once_with(self.snap_name) + + @mock.patch('oslo_concurrency.processutils.execute') + def test_get_pool_info(self, mock_execute): + mock_execute.return_value = (CEPH_DF, '') + ceph_df_json = jsonutils.loads(CEPH_DF) + expected = {'total': ceph_df_json['stats']['total_bytes'], + 'free': ceph_df_json['pools'][1]['stats']['max_avail'], + 'used': ceph_df_json['pools'][1]['stats']['bytes_used']} + self.assertDictEqual(expected, self.driver.get_pool_info()) + + @mock.patch('oslo_concurrency.processutils.execute') + def test_get_pool_info_not_found(self, mock_execute): + # Make the pool something other than self.rbd_pool so it won't be found + ceph_df_not_found = CEPH_DF.replace('rbd', 'vms') + mock_execute.return_value = (ceph_df_not_found, '') + self.assertRaises(exception.NotFound, self.driver.get_pool_info) diff --git a/nova/virt/libvirt/storage/rbd_utils.py b/nova/virt/libvirt/storage/rbd_utils.py index 133f72f410ec..208bc9d86489 100644 --- a/nova/virt/libvirt/storage/rbd_utils.py +++ b/nova/virt/libvirt/storage/rbd_utils.py @@ -30,7 +30,6 @@ from oslo_serialization import jsonutils from oslo_service import loopingcall from oslo_utils import encodeutils from oslo_utils import excutils -from oslo_utils import units from nova import exception from nova.i18n import _ @@ -366,11 +365,32 @@ class RBDDriver(object): self._destroy_volume(client, volume) def get_pool_info(self): - with RADOSClient(self) as client: - stats = client.cluster.get_cluster_stats() - return {'total': stats['kb'] * units.Ki, - 'free': stats['kb_avail'] * units.Ki, - 'used': stats['kb_used'] * units.Ki} + # NOTE(melwitt): We're executing 'ceph df' here instead of calling + # the RADOSClient.get_cluster_stats python API because we need + # access to the MAX_AVAIL stat, which reports the available bytes + # taking replication into consideration. The global available stat + # from the RADOSClient.get_cluster_stats python API does not take + # replication size into consideration and will simply return the + # available storage per OSD, added together across all OSDs. The + # MAX_AVAIL stat will divide by the replication size when doing the + # calculation. + args = ['ceph', 'df', '--format=json'] + self.ceph_args() + out, _ = processutils.execute(*args) + stats = jsonutils.loads(out) + + # Find the pool for which we are configured. + pool_stats = None + for pool in stats['pools']: + if pool['name'] == self.pool: + pool_stats = pool['stats'] + break + + if pool_stats is None: + raise exception.NotFound('Pool %s could not be found.' % self.pool) + + return {'total': stats['stats']['total_bytes'], + 'free': pool_stats['max_avail'], + 'used': pool_stats['bytes_used']} def create_snap(self, volume, name, pool=None, protect=False): """Create a snapshot of an RBD volume. diff --git a/releasenotes/notes/rbd-enhance-get-pool-info-14afc8eccab49dcf.yaml b/releasenotes/notes/rbd-enhance-get-pool-info-14afc8eccab49dcf.yaml new file mode 100644 index 000000000000..4e4352e4bc06 --- /dev/null +++ b/releasenotes/notes/rbd-enhance-get-pool-info-14afc8eccab49dcf.yaml @@ -0,0 +1,9 @@ +--- +other: + - | + The reporting for bytes available for RBD has been enhanced to accomodate + `unrecommended + `_ + Ceph deployments where multiple OSDs are running on a single disk. The new + reporting method takes the number of configured replicas into consideration + when reporting bytes available.