Fix RBD timeout

On the RBD driver the rados_connect_timeout is not working and the
default 300 seconds timeout is being applied.

This is a considerable problem if the connectivity to the cluster is
lost because cinder volume service will get stuck trying to retrieve the
stats and will appear as being down to the scheduler.

The reason for the timeout not being honored is because connect method
in librados library no longer accepts a timeout parameter, so the Python
bindings accept the timeout parameter but ignore it.

As can be read in the Python binding code [1]:
        # NOTE(sileht): timeout was supported by old python API, but
        # this is not something available in C API, so ignore for now
        # and remove it later

This patch fixes this by setting timeouts at the Rados class level.

[1]: https://github.com/ceph/ceph/blob/master/src/pybind/rados/rados.pyx#L793

Closes-Bug: #1621942
Change-Id: I3266e1331f5f4586b1e56ea78f6caaf8752d869f
This commit is contained in:
Gorka Eguileor 2016-09-09 19:50:40 +02:00
parent 91e322ad10
commit 0685b4f5e9
2 changed files with 15 additions and 7 deletions

View File

@ -1031,6 +1031,8 @@ class RBDTestCase(test.TestCase):
self.assertEqual(self.mock_rados.Rados.return_value.ioctx, ret[1])
self.mock_rados.Rados.return_value.open_ioctx.assert_called_with(
self.cfg.rbd_pool)
conf_set = self.mock_rados.Rados.return_value.conf_set
conf_set.assert_not_called()
# different pool
ret = self.driver._connect_to_rados('alt_pool')
@ -1044,8 +1046,10 @@ class RBDTestCase(test.TestCase):
self.cfg.rados_connect_timeout = 1
self.mock_rados.Rados.return_value.connect.reset_mock()
self.driver._connect_to_rados()
self.mock_rados.Rados.return_value.connect.assert_called_once_with(
timeout=1)
conf_set.assert_has_calls((mock.call('rados_osd_op_timeout', '1'),
mock.call('rados_mon_op_timeout', '1'),
mock.call('client_mount_timeout', '1')))
self.mock_rados.Rados.return_value.connect.assert_called_once_with()
# error
self.mock_rados.Rados.return_value.open_ioctx.reset_mock()

View File

@ -25,6 +25,7 @@ from oslo_config import cfg
from oslo_log import log as logging
from oslo_utils import fileutils
from oslo_utils import units
import six
from six.moves import urllib
from cinder import exception
@ -336,11 +337,14 @@ class RBDDriver(driver.TransferVD, driver.ExtendVD,
pool = self.configuration.rbd_pool
try:
if self.configuration.rados_connect_timeout >= 0:
client.connect(timeout=
self.configuration.rados_connect_timeout)
else:
client.connect()
timeout = self.configuration.rados_connect_timeout
if timeout >= 0:
timeout = six.text_type(timeout)
client.conf_set('rados_osd_op_timeout', timeout)
client.conf_set('rados_mon_op_timeout', timeout)
client.conf_set('client_mount_timeout', timeout)
client.connect()
ioctx = client.open_ioctx(pool)
return client, ioctx
except self.rados.Error: