From 3f5a37cb6e27946da3a5943289d5996fa96a552a Mon Sep 17 00:00:00 2001 From: Mike Fedosin Date: Fri, 10 Jul 2015 16:53:14 +0300 Subject: [PATCH] Prevent glance-api hangups during connection to rbd This code adds new config option for rados connect timeout, which tells how much time glance-api has to wait before close the connection. Setting 'rados_connect_timeout' <= 0 means no timeout. DocImpact: new config option 'rados_connect_timout' (default=0) Change-Id: Ib44f74063d5a8332c6b5f3e15bcfa044c86bef9b Closes-Bug: #1469246 --- glance_store/_drivers/rbd.py | 39 +++++++++++++++++------ glance_store/tests/unit/test_opts.py | 1 + glance_store/tests/unit/test_rbd_store.py | 20 ++++++++++++ 3 files changed, 51 insertions(+), 9 deletions(-) diff --git a/glance_store/_drivers/rbd.py b/glance_store/_drivers/rbd.py index e48cf47b..e5345c33 100644 --- a/glance_store/_drivers/rbd.py +++ b/glance_store/_drivers/rbd.py @@ -15,9 +15,7 @@ """Storage backend for RBD (RADOS (Reliable Autonomic Distributed Object Store) Block Device)""" -from __future__ import absolute_import -from __future__ import with_statement - +import contextlib import hashlib import logging import math @@ -69,6 +67,10 @@ _RBD_OPTS = [ 'If using cephx authentication, this file should ' 'include a reference to the right keyring ' 'in a client. section')), + cfg.IntOpt('rados_connect_timeout', default=0, + help=_('Timeout value (in seconds) used when connecting to ' + 'ceph cluster. If value <= 0, no timeout is set and ' + 'default librados value is used.')) ] @@ -154,11 +156,12 @@ class ImageIterator(object): self.user = store.user self.conf_file = store.conf_file self.chunk_size = chunk_size or store.READ_CHUNKSIZE + self.store = store def __iter__(self): try: - with rados.Rados(conffile=self.conf_file, - rados_id=self.user) as conn: + with self.store.get_connection(conffile=self.conf_file, + rados_id=self.user) as conn: with conn.open_ioctx(self.pool) as ioctx: with rbd.Image(ioctx, self.name, snapshot=self.snapshot) as image: @@ -187,6 +190,21 @@ class Store(driver.Store): def get_schemes(self): return ('rbd',) + @contextlib.contextmanager + def get_connection(self, conffile, rados_id): + client = rados.Rados(conffile=conffile, rados_id=rados_id) + + try: + client.connect(timeout=self.connect_timeout) + except rados.Error: + msg = _LE("Error connecting to ceph cluster.") + LOG.exception(msg) + raise exceptions.BackendException() + try: + yield client + finally: + client.shutdown() + def configure_add(self): """ Configure the Store to use the stored configuration options @@ -205,6 +223,7 @@ class Store(driver.Store): self.pool = str(self.conf.glance_store.rbd_store_pool) self.user = str(self.conf.glance_store.rbd_store_user) self.conf_file = str(self.conf.glance_store.rbd_store_ceph_conf) + self.connect_timeout = self.conf.glance_store.rados_connect_timeout except cfg.ConfigFileValueError as e: reason = _("Error in store configuration: %s") % e LOG.error(reason) @@ -239,8 +258,8 @@ class Store(driver.Store): # if there is a pool specific in the location, use it; otherwise # we fall back to the default pool specified in the config target_pool = loc.pool or self.pool - with rados.Rados(conffile=self.conf_file, - rados_id=self.user) as conn: + with self.get_connection(conffile=self.conf_file, + rados_id=self.user) as conn: with conn.open_ioctx(target_pool) as ioctx: try: with rbd.Image(ioctx, loc.image, @@ -287,7 +306,8 @@ class Store(driver.Store): :raises NotFound if image does not exist; InUseByStore if image is in use or snapshot unprotect failed """ - with rados.Rados(conffile=self.conf_file, rados_id=self.user) as conn: + with self.get_connection(conffile=self.conf_file, + rados_id=self.user) as conn: with conn.open_ioctx(target_pool) as ioctx: try: # First remove snapshot. @@ -334,7 +354,8 @@ class Store(driver.Store): """ checksum = hashlib.md5() image_name = str(image_id) - with rados.Rados(conffile=self.conf_file, rados_id=self.user) as conn: + with self.get_connection(conffile=self.conf_file, + rados_id=self.user) as conn: fsid = None if hasattr(conn, 'get_fsid'): fsid = conn.get_fsid() diff --git a/glance_store/tests/unit/test_opts.py b/glance_store/tests/unit/test_opts.py index 5559dec1..63520400 100644 --- a/glance_store/tests/unit/test_opts.py +++ b/glance_store/tests/unit/test_opts.py @@ -76,6 +76,7 @@ class OptsTestCase(base.StoreBaseTest): 'rbd_store_chunk_size', 'rbd_store_pool', 'rbd_store_user', + 'rados_connect_timeout', 's3_store_access_key', 's3_store_bucket', 's3_store_bucket_url_format', diff --git a/glance_store/tests/unit/test_rbd_store.py b/glance_store/tests/unit/test_rbd_store.py index 9c179394..93cd0f70 100644 --- a/glance_store/tests/unit/test_rbd_store.py +++ b/glance_store/tests/unit/test_rbd_store.py @@ -26,6 +26,9 @@ from glance_store.tests.unit import test_store_capabilities class MockRados(object): + class Error(Exception): + pass + class ioctx(object): def __init__(self, *args, **kwargs): pass @@ -299,6 +302,23 @@ class TestStore(base.StoreBaseTest, self.assertRaises(exceptions.StoreRandomGetNotSupported, self.store.get, loc, chunk_size=1) + @mock.patch.object(MockRados.Rados, 'connect') + def test_rados_connect_timeout(self, mock_rados_connect): + socket_timeout = 1.5 + self.config(rados_connect_timeout=socket_timeout) + self.store.configure() + with self.store.get_connection('conffile', 'rados_id'): + mock_rados_connect.assert_called_with(timeout=socket_timeout) + + @mock.patch.object(MockRados.Rados, 'connect', side_effect=MockRados.Error) + def test_rados_connect_error(self, _): + rbd_store.rados.Error = MockRados.Error + + def test(): + with self.store.get_connection('conffile', 'rados_id'): + pass + self.assertRaises(exceptions.BackendException, test) + def test_create_image_conf_features(self): # Tests that we use non-0 features from ceph.conf and cast to int. fsid = 'fake'