Prevent glance-api hangups during connection to rbd
This code adds new config option for rados connect timeout, which tells how much time glance-api has to wait before close the connection. Setting 'rados_connect_timeout' <= 0 means no timeout. DocImpact: new config option 'rados_connect_timout' (default=0) Change-Id: Ib44f74063d5a8332c6b5f3e15bcfa044c86bef9b Closes-Bug: #1469246
This commit is contained in:
parent
cd04b9217f
commit
3f5a37cb6e
|
@ -15,9 +15,7 @@
|
|||
|
||||
"""Storage backend for RBD
|
||||
(RADOS (Reliable Autonomic Distributed Object Store) Block Device)"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import with_statement
|
||||
|
||||
import contextlib
|
||||
import hashlib
|
||||
import logging
|
||||
import math
|
||||
|
@ -69,6 +67,10 @@ _RBD_OPTS = [
|
|||
'If using cephx authentication, this file should '
|
||||
'include a reference to the right keyring '
|
||||
'in a client.<USER> section')),
|
||||
cfg.IntOpt('rados_connect_timeout', default=0,
|
||||
help=_('Timeout value (in seconds) used when connecting to '
|
||||
'ceph cluster. If value <= 0, no timeout is set and '
|
||||
'default librados value is used.'))
|
||||
]
|
||||
|
||||
|
||||
|
@ -154,11 +156,12 @@ class ImageIterator(object):
|
|||
self.user = store.user
|
||||
self.conf_file = store.conf_file
|
||||
self.chunk_size = chunk_size or store.READ_CHUNKSIZE
|
||||
self.store = store
|
||||
|
||||
def __iter__(self):
|
||||
try:
|
||||
with rados.Rados(conffile=self.conf_file,
|
||||
rados_id=self.user) as conn:
|
||||
with self.store.get_connection(conffile=self.conf_file,
|
||||
rados_id=self.user) as conn:
|
||||
with conn.open_ioctx(self.pool) as ioctx:
|
||||
with rbd.Image(ioctx, self.name,
|
||||
snapshot=self.snapshot) as image:
|
||||
|
@ -187,6 +190,21 @@ class Store(driver.Store):
|
|||
def get_schemes(self):
|
||||
return ('rbd',)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def get_connection(self, conffile, rados_id):
|
||||
client = rados.Rados(conffile=conffile, rados_id=rados_id)
|
||||
|
||||
try:
|
||||
client.connect(timeout=self.connect_timeout)
|
||||
except rados.Error:
|
||||
msg = _LE("Error connecting to ceph cluster.")
|
||||
LOG.exception(msg)
|
||||
raise exceptions.BackendException()
|
||||
try:
|
||||
yield client
|
||||
finally:
|
||||
client.shutdown()
|
||||
|
||||
def configure_add(self):
|
||||
"""
|
||||
Configure the Store to use the stored configuration options
|
||||
|
@ -205,6 +223,7 @@ class Store(driver.Store):
|
|||
self.pool = str(self.conf.glance_store.rbd_store_pool)
|
||||
self.user = str(self.conf.glance_store.rbd_store_user)
|
||||
self.conf_file = str(self.conf.glance_store.rbd_store_ceph_conf)
|
||||
self.connect_timeout = self.conf.glance_store.rados_connect_timeout
|
||||
except cfg.ConfigFileValueError as e:
|
||||
reason = _("Error in store configuration: %s") % e
|
||||
LOG.error(reason)
|
||||
|
@ -239,8 +258,8 @@ class Store(driver.Store):
|
|||
# if there is a pool specific in the location, use it; otherwise
|
||||
# we fall back to the default pool specified in the config
|
||||
target_pool = loc.pool or self.pool
|
||||
with rados.Rados(conffile=self.conf_file,
|
||||
rados_id=self.user) as conn:
|
||||
with self.get_connection(conffile=self.conf_file,
|
||||
rados_id=self.user) as conn:
|
||||
with conn.open_ioctx(target_pool) as ioctx:
|
||||
try:
|
||||
with rbd.Image(ioctx, loc.image,
|
||||
|
@ -287,7 +306,8 @@ class Store(driver.Store):
|
|||
:raises NotFound if image does not exist;
|
||||
InUseByStore if image is in use or snapshot unprotect failed
|
||||
"""
|
||||
with rados.Rados(conffile=self.conf_file, rados_id=self.user) as conn:
|
||||
with self.get_connection(conffile=self.conf_file,
|
||||
rados_id=self.user) as conn:
|
||||
with conn.open_ioctx(target_pool) as ioctx:
|
||||
try:
|
||||
# First remove snapshot.
|
||||
|
@ -334,7 +354,8 @@ class Store(driver.Store):
|
|||
"""
|
||||
checksum = hashlib.md5()
|
||||
image_name = str(image_id)
|
||||
with rados.Rados(conffile=self.conf_file, rados_id=self.user) as conn:
|
||||
with self.get_connection(conffile=self.conf_file,
|
||||
rados_id=self.user) as conn:
|
||||
fsid = None
|
||||
if hasattr(conn, 'get_fsid'):
|
||||
fsid = conn.get_fsid()
|
||||
|
|
|
@ -76,6 +76,7 @@ class OptsTestCase(base.StoreBaseTest):
|
|||
'rbd_store_chunk_size',
|
||||
'rbd_store_pool',
|
||||
'rbd_store_user',
|
||||
'rados_connect_timeout',
|
||||
's3_store_access_key',
|
||||
's3_store_bucket',
|
||||
's3_store_bucket_url_format',
|
||||
|
|
|
@ -26,6 +26,9 @@ from glance_store.tests.unit import test_store_capabilities
|
|||
|
||||
class MockRados(object):
|
||||
|
||||
class Error(Exception):
|
||||
pass
|
||||
|
||||
class ioctx(object):
|
||||
def __init__(self, *args, **kwargs):
|
||||
pass
|
||||
|
@ -299,6 +302,23 @@ class TestStore(base.StoreBaseTest,
|
|||
self.assertRaises(exceptions.StoreRandomGetNotSupported,
|
||||
self.store.get, loc, chunk_size=1)
|
||||
|
||||
@mock.patch.object(MockRados.Rados, 'connect')
|
||||
def test_rados_connect_timeout(self, mock_rados_connect):
|
||||
socket_timeout = 1.5
|
||||
self.config(rados_connect_timeout=socket_timeout)
|
||||
self.store.configure()
|
||||
with self.store.get_connection('conffile', 'rados_id'):
|
||||
mock_rados_connect.assert_called_with(timeout=socket_timeout)
|
||||
|
||||
@mock.patch.object(MockRados.Rados, 'connect', side_effect=MockRados.Error)
|
||||
def test_rados_connect_error(self, _):
|
||||
rbd_store.rados.Error = MockRados.Error
|
||||
|
||||
def test():
|
||||
with self.store.get_connection('conffile', 'rados_id'):
|
||||
pass
|
||||
self.assertRaises(exceptions.BackendException, test)
|
||||
|
||||
def test_create_image_conf_features(self):
|
||||
# Tests that we use non-0 features from ceph.conf and cast to int.
|
||||
fsid = 'fake'
|
||||
|
|
Loading…
Reference in New Issue