Prevent glance-api hangups during connection to rbd

This code adds new config option for rados connect timeout,
which tells how much time glance-api has to wait before
close the connection.

Setting 'rados_connect_timeout' <= 0  means no timeout.

DocImpact:
    new config option 'rados_connect_timout' (default=0)

Change-Id: Ib44f74063d5a8332c6b5f3e15bcfa044c86bef9b
Closes-Bug: #1469246
This commit is contained in:
Mike Fedosin 2015-07-10 16:53:14 +03:00
parent cd04b9217f
commit 3f5a37cb6e
3 changed files with 51 additions and 9 deletions

View File

@ -15,9 +15,7 @@
"""Storage backend for RBD
(RADOS (Reliable Autonomic Distributed Object Store) Block Device)"""
from __future__ import absolute_import
from __future__ import with_statement
import contextlib
import hashlib
import logging
import math
@ -69,6 +67,10 @@ _RBD_OPTS = [
'If using cephx authentication, this file should '
'include a reference to the right keyring '
'in a client.<USER> section')),
cfg.IntOpt('rados_connect_timeout', default=0,
help=_('Timeout value (in seconds) used when connecting to '
'ceph cluster. If value <= 0, no timeout is set and '
'default librados value is used.'))
]
@ -154,11 +156,12 @@ class ImageIterator(object):
self.user = store.user
self.conf_file = store.conf_file
self.chunk_size = chunk_size or store.READ_CHUNKSIZE
self.store = store
def __iter__(self):
try:
with rados.Rados(conffile=self.conf_file,
rados_id=self.user) as conn:
with self.store.get_connection(conffile=self.conf_file,
rados_id=self.user) as conn:
with conn.open_ioctx(self.pool) as ioctx:
with rbd.Image(ioctx, self.name,
snapshot=self.snapshot) as image:
@ -187,6 +190,21 @@ class Store(driver.Store):
def get_schemes(self):
return ('rbd',)
@contextlib.contextmanager
def get_connection(self, conffile, rados_id):
client = rados.Rados(conffile=conffile, rados_id=rados_id)
try:
client.connect(timeout=self.connect_timeout)
except rados.Error:
msg = _LE("Error connecting to ceph cluster.")
LOG.exception(msg)
raise exceptions.BackendException()
try:
yield client
finally:
client.shutdown()
def configure_add(self):
"""
Configure the Store to use the stored configuration options
@ -205,6 +223,7 @@ class Store(driver.Store):
self.pool = str(self.conf.glance_store.rbd_store_pool)
self.user = str(self.conf.glance_store.rbd_store_user)
self.conf_file = str(self.conf.glance_store.rbd_store_ceph_conf)
self.connect_timeout = self.conf.glance_store.rados_connect_timeout
except cfg.ConfigFileValueError as e:
reason = _("Error in store configuration: %s") % e
LOG.error(reason)
@ -239,8 +258,8 @@ class Store(driver.Store):
# if there is a pool specific in the location, use it; otherwise
# we fall back to the default pool specified in the config
target_pool = loc.pool or self.pool
with rados.Rados(conffile=self.conf_file,
rados_id=self.user) as conn:
with self.get_connection(conffile=self.conf_file,
rados_id=self.user) as conn:
with conn.open_ioctx(target_pool) as ioctx:
try:
with rbd.Image(ioctx, loc.image,
@ -287,7 +306,8 @@ class Store(driver.Store):
:raises NotFound if image does not exist;
InUseByStore if image is in use or snapshot unprotect failed
"""
with rados.Rados(conffile=self.conf_file, rados_id=self.user) as conn:
with self.get_connection(conffile=self.conf_file,
rados_id=self.user) as conn:
with conn.open_ioctx(target_pool) as ioctx:
try:
# First remove snapshot.
@ -334,7 +354,8 @@ class Store(driver.Store):
"""
checksum = hashlib.md5()
image_name = str(image_id)
with rados.Rados(conffile=self.conf_file, rados_id=self.user) as conn:
with self.get_connection(conffile=self.conf_file,
rados_id=self.user) as conn:
fsid = None
if hasattr(conn, 'get_fsid'):
fsid = conn.get_fsid()

View File

@ -76,6 +76,7 @@ class OptsTestCase(base.StoreBaseTest):
'rbd_store_chunk_size',
'rbd_store_pool',
'rbd_store_user',
'rados_connect_timeout',
's3_store_access_key',
's3_store_bucket',
's3_store_bucket_url_format',

View File

@ -26,6 +26,9 @@ from glance_store.tests.unit import test_store_capabilities
class MockRados(object):
class Error(Exception):
pass
class ioctx(object):
def __init__(self, *args, **kwargs):
pass
@ -299,6 +302,23 @@ class TestStore(base.StoreBaseTest,
self.assertRaises(exceptions.StoreRandomGetNotSupported,
self.store.get, loc, chunk_size=1)
@mock.patch.object(MockRados.Rados, 'connect')
def test_rados_connect_timeout(self, mock_rados_connect):
socket_timeout = 1.5
self.config(rados_connect_timeout=socket_timeout)
self.store.configure()
with self.store.get_connection('conffile', 'rados_id'):
mock_rados_connect.assert_called_with(timeout=socket_timeout)
@mock.patch.object(MockRados.Rados, 'connect', side_effect=MockRados.Error)
def test_rados_connect_error(self, _):
rbd_store.rados.Error = MockRados.Error
def test():
with self.store.get_connection('conffile', 'rados_id'):
pass
self.assertRaises(exceptions.BackendException, test)
def test_create_image_conf_features(self):
# Tests that we use non-0 features from ceph.conf and cast to int.
fsid = 'fake'