rbd: use MAX_AVAIL stat for reporting bytes available

Currently, the reporting of bytes available works well for recommended
Ceph deployments that run one OSD per disk [1]. However, for users who
are running multiple OSDs on a single disk, the current reporting will
reflect bytes available * number of replicas.

We can enhance the bytes available reporting method to accomodate
unrecommended Ceph deployments by using the MAX_AVAIL stat obtainable
via the 'ceph df' command. The MAX_AVAIL stat takes the number of
configured replicas into consideration and will reflect the correct
number of bytes available even when Ceph is deployed in a way the
documentation recommends against.

For most users, this change should make no difference. It will only be
a help for users who are running unrecommended Ceph deployments.

[1] http://docs.ceph.com/docs/luminous/start/hardware-recommendations/#hard-disk-drives

Change-Id: I96faff6d3b9747514441d83c629fdd1cface1eb5
This commit is contained in:
melanie witt 2018-03-27 01:27:56 +00:00
parent 155da8f71f
commit 44254ca865
3 changed files with 99 additions and 6 deletions

View File

@ -13,6 +13,7 @@
from eventlet import tpool
import mock
from oslo_serialization import jsonutils
from oslo_utils.fixture import uuidsentinel as uuids
from nova.compute import task_states
@ -51,6 +52,53 @@ CEPH_MON_DUMP = """dumped monmap epoch 1
"""
# max_avail stats are tweaked for testing
CEPH_DF = """
{
"stats": {
"total_bytes": 25757220864,
"total_used_bytes": 274190336,
"total_avail_bytes": 25483030528
},
"pools": [
{
"name": "images",
"id": 1,
"stats": {
"kb_used": 12419,
"bytes_used": 12716067,
"percent_used": 0.05,
"max_avail": 24195168123,
"objects": 6
}
},
{
"name": "rbd",
"id": 2,
"stats": {
"kb_used": 0,
"bytes_used": 0,
"percent_used": 0.00,
"max_avail": 24195168456,
"objects": 0
}
},
{
"name": "volumes",
"id": 3,
"stats": {
"kb_used": 0,
"bytes_used": 0,
"percent_used": 0.00,
"max_avail": 24195168789,
"objects": 0
}
}
]
}
"""
class FakeException(Exception):
pass
@ -557,3 +605,19 @@ class RbdTestCase(test.NoDBTestCase):
proxy.list_snaps.return_value = [{'name': self.snap_name}, ]
self.driver.rollback_to_snap(self.volume_name, self.snap_name)
proxy.rollback_to_snap.assert_called_once_with(self.snap_name)
@mock.patch('oslo_concurrency.processutils.execute')
def test_get_pool_info(self, mock_execute):
mock_execute.return_value = (CEPH_DF, '')
ceph_df_json = jsonutils.loads(CEPH_DF)
expected = {'total': ceph_df_json['stats']['total_bytes'],
'free': ceph_df_json['pools'][1]['stats']['max_avail'],
'used': ceph_df_json['pools'][1]['stats']['bytes_used']}
self.assertDictEqual(expected, self.driver.get_pool_info())
@mock.patch('oslo_concurrency.processutils.execute')
def test_get_pool_info_not_found(self, mock_execute):
# Make the pool something other than self.rbd_pool so it won't be found
ceph_df_not_found = CEPH_DF.replace('rbd', 'vms')
mock_execute.return_value = (ceph_df_not_found, '')
self.assertRaises(exception.NotFound, self.driver.get_pool_info)

View File

@ -30,7 +30,6 @@ from oslo_serialization import jsonutils
from oslo_service import loopingcall
from oslo_utils import encodeutils
from oslo_utils import excutils
from oslo_utils import units
from nova import exception
from nova.i18n import _
@ -366,11 +365,32 @@ class RBDDriver(object):
self._destroy_volume(client, volume)
def get_pool_info(self):
with RADOSClient(self) as client:
stats = client.cluster.get_cluster_stats()
return {'total': stats['kb'] * units.Ki,
'free': stats['kb_avail'] * units.Ki,
'used': stats['kb_used'] * units.Ki}
# NOTE(melwitt): We're executing 'ceph df' here instead of calling
# the RADOSClient.get_cluster_stats python API because we need
# access to the MAX_AVAIL stat, which reports the available bytes
# taking replication into consideration. The global available stat
# from the RADOSClient.get_cluster_stats python API does not take
# replication size into consideration and will simply return the
# available storage per OSD, added together across all OSDs. The
# MAX_AVAIL stat will divide by the replication size when doing the
# calculation.
args = ['ceph', 'df', '--format=json'] + self.ceph_args()
out, _ = processutils.execute(*args)
stats = jsonutils.loads(out)
# Find the pool for which we are configured.
pool_stats = None
for pool in stats['pools']:
if pool['name'] == self.pool:
pool_stats = pool['stats']
break
if pool_stats is None:
raise exception.NotFound('Pool %s could not be found.' % self.pool)
return {'total': stats['stats']['total_bytes'],
'free': pool_stats['max_avail'],
'used': pool_stats['bytes_used']}
def create_snap(self, volume, name, pool=None, protect=False):
"""Create a snapshot of an RBD volume.

View File

@ -0,0 +1,9 @@
---
other:
- |
The reporting for bytes available for RBD has been enhanced to accomodate
`unrecommended
<http://docs.ceph.com/docs/luminous/start/hardware-recommendations/#hard-disk-drives>`_
Ceph deployments where multiple OSDs are running on a single disk. The new
reporting method takes the number of configured replicas into consideration
when reporting bytes available.