From 79994d643c76a176550e8d3a11cadc51c767df59 Mon Sep 17 00:00:00 2001 From: Nikesh Mahalka Date: Tue, 2 Aug 2016 13:36:42 -0400 Subject: [PATCH] Concurrency issue in K2 iSCSI and FC Cinder drivers Kaminario K2 iSCSI and FC arrays do not support concurrent operations. To avoid this, use locks and retry mechanism on each K2 requests. Also, use locks to avoid race conditions between attach and detach volumes. Change-Id: Ia5c37ebf0333f962ee1a71f71337e6b9c133241b Closes-Bug: #1608913 Co-Authored-By: Lakshmi Narayana Co-Authored-By: Sreedhar Varma Co-Authored-By: Ido Benda --- cinder/exception.py | 4 ++ .../drivers/kaminario/kaminario_common.py | 68 +++++++++++++++---- .../volume/drivers/kaminario/kaminario_fc.py | 4 ++ .../drivers/kaminario/kaminario_iscsi.py | 8 +++ ...ario-concurrency-bug-e0b899a42383660c.yaml | 9 +++ 5 files changed, 81 insertions(+), 12 deletions(-) create mode 100644 releasenotes/notes/kaminario-concurrency-bug-e0b899a42383660c.yaml diff --git a/cinder/exception.py b/cinder/exception.py index dea0bfe03de..067c93296a7 100644 --- a/cinder/exception.py +++ b/cinder/exception.py @@ -1199,6 +1199,10 @@ class KaminarioCinderDriverException(VolumeDriverException): message = _("KaminarioCinderDriver failure: %(reason)s") +class KaminarioRetryableException(VolumeDriverException): + message = _("Kaminario retryable exception: %(reason)s") + + # Synology driver class SynoAPIHTTPError(CinderException): message = _("HTTP exit code: [%(code)s]") diff --git a/cinder/volume/drivers/kaminario/kaminario_common.py b/cinder/volume/drivers/kaminario/kaminario_common.py index 7d4ef60fc6b..686777b7712 100644 --- a/cinder/volume/drivers/kaminario/kaminario_common.py +++ b/cinder/volume/drivers/kaminario/kaminario_common.py @@ -16,7 +16,7 @@ import math import re -import six +import threading import eventlet from oslo_config import cfg @@ -24,6 +24,8 @@ from oslo_log import log as logging from oslo_utils import importutils from oslo_utils import units from oslo_utils import versionutils +import requests +import six import cinder from cinder import exception @@ -33,7 +35,11 @@ from cinder import utils from cinder.volume.drivers.san import san from cinder.volume import utils as vol_utils +krest = importutils.try_import("krest") + K2_MIN_VERSION = '2.2.0' +K2_LOCK_PREFIX = 'Kaminario' +MAX_K2_RETRY = 5 LOG = logging.getLogger(__name__) kaminario1_opts = [ @@ -55,6 +61,43 @@ kaminario2_opts = [ CONF = cfg.CONF CONF.register_opts(kaminario1_opts) +K2HTTPError = requests.exceptions.HTTPError +K2_RETRY_ERRORS = ("MC_ERR_BUSY", "MC_ERR_BUSY_SPECIFIC", + "MC_ERR_INPROGRESS", "MC_ERR_START_TIMEOUT") + +if krest: + class KrestWrap(krest.EndPoint): + def __init__(self, *args, **kwargs): + self.krestlock = threading.Lock() + super(KrestWrap, self).__init__(*args, **kwargs) + + def _should_retry(self, err_code, err_msg): + if err_code == 400: + for er in K2_RETRY_ERRORS: + if er in err_msg: + LOG.debug("Retry ERROR: %d with status %s", + err_code, err_msg) + return True + return False + + @utils.retry(exception.KaminarioRetryableException, + retries=MAX_K2_RETRY) + def _request(self, method, *args, **kwargs): + try: + LOG.debug("running through the _request wrapper...") + self.krestlock.acquire() + return super(KrestWrap, self)._request(method, + *args, **kwargs) + except K2HTTPError as err: + err_code = err.response.status_code + err_msg = err.response.text + if self._should_retry(err_code, err_msg): + raise exception.KaminarioRetryableException( + reason=six.text_type(err_msg)) + raise + finally: + self.krestlock.release() + def kaminario_logger(func): """Return a function wrapper. @@ -96,23 +139,25 @@ class KaminarioCinderDriver(cinder.volume.driver.ISCSIDriver): self.configuration.append_config_values(kaminario2_opts) self.replica = None self._protocol = None + k2_lock_sfx = self.configuration.safe_get('volume_backend_name') or '' + self.k2_lock_name = "%s-%s" % (K2_LOCK_PREFIX, k2_lock_sfx) def check_for_setup_error(self): - if self.krest is None: + if krest is None: msg = _("Unable to import 'krest' python module.") LOG.error(msg) raise exception.KaminarioCinderDriverException(reason=msg) else: conf = self.configuration - self.client = self.krest.EndPoint(conf.san_ip, - conf.san_login, - conf.san_password, - ssl_validate=False) + self.client = KrestWrap(conf.san_ip, + conf.san_login, + conf.san_password, + ssl_validate=False) if self.replica: - self.target = self.krest.EndPoint(self.replica.backend_id, - self.replica.login, - self.replica.password, - ssl_validate=False) + self.target = KrestWrap(self.replica.backend_id, + self.replica.login, + self.replica.password, + ssl_validate=False) v_rs = self.client.search("system/state") if hasattr(v_rs, 'hits') and v_rs.total != 0: ver = v_rs.hits[0].rest_api_version @@ -150,7 +195,6 @@ class KaminarioCinderDriver(cinder.volume.driver.ISCSIDriver): def do_setup(self, context): super(KaminarioCinderDriver, self).do_setup(context) self._check_ops() - self.krest = importutils.try_import("krest") @kaminario_logger def create_volume(self, volume): @@ -646,7 +690,7 @@ class KaminarioCinderDriver(cinder.volume.driver.ISCSIDriver): pass @kaminario_logger - def terminate_connection(self, volume, connector, **kwargs): + def terminate_connection(self, volume, connector): """Terminate connection of volume from host.""" # Get volume object if type(volume).__name__ != 'RestObject': diff --git a/cinder/volume/drivers/kaminario/kaminario_fc.py b/cinder/volume/drivers/kaminario/kaminario_fc.py index dcbf1f0ca7a..1f957094f45 100644 --- a/cinder/volume/drivers/kaminario/kaminario_fc.py +++ b/cinder/volume/drivers/kaminario/kaminario_fc.py @@ -17,6 +17,7 @@ import six from oslo_log import log as logging +from cinder import coordination from cinder import exception from cinder.i18n import _, _LE from cinder.objects import fields @@ -47,6 +48,7 @@ class KaminarioFCDriver(common.KaminarioCinderDriver): @fczm_utils.AddFCZone @kaminario_logger + @coordination.synchronized('{self.k2_lock_name}') def initialize_connection(self, volume, connector): """Attach K2 volume to host.""" # Check wwpns in host connector. @@ -69,6 +71,8 @@ class KaminarioFCDriver(common.KaminarioCinderDriver): "initiator_target_map": init_target_map}} @fczm_utils.RemoveFCZone + @kaminario_logger + @coordination.synchronized('{self.k2_lock_name}') def terminate_connection(self, volume, connector, **kwargs): super(KaminarioFCDriver, self).terminate_connection(volume, connector) properties = {"driver_volume_type": "fibre_channel", "data": {}} diff --git a/cinder/volume/drivers/kaminario/kaminario_iscsi.py b/cinder/volume/drivers/kaminario/kaminario_iscsi.py index 6c0df6cfca2..bdffb671d4e 100644 --- a/cinder/volume/drivers/kaminario/kaminario_iscsi.py +++ b/cinder/volume/drivers/kaminario/kaminario_iscsi.py @@ -17,6 +17,7 @@ import six from oslo_log import log as logging +from cinder import coordination from cinder import exception from cinder.i18n import _, _LE from cinder import interface @@ -47,6 +48,7 @@ class KaminarioISCSIDriver(common.KaminarioCinderDriver): self._protocol = 'iSCSI' @kaminario_logger + @coordination.synchronized('{self.k2_lock_name}') def initialize_connection(self, volume, connector): """Attach K2 volume to host.""" # Get target_portal and target iqn. @@ -60,6 +62,12 @@ class KaminarioISCSIDriver(common.KaminarioCinderDriver): "target_lun": lun, "target_discovered": True}} + @kaminario_logger + @coordination.synchronized('{self.k2_lock_name}') + def terminate_connection(self, volume, connector, **kwargs): + super(KaminarioISCSIDriver, self).terminate_connection(volume, + connector) + @kaminario_logger def get_target_info(self, volume): rep_status = fields.ReplicationStatus.FAILED_OVER diff --git a/releasenotes/notes/kaminario-concurrency-bug-e0b899a42383660c.yaml b/releasenotes/notes/kaminario-concurrency-bug-e0b899a42383660c.yaml new file mode 100644 index 00000000000..dd969b8113a --- /dev/null +++ b/releasenotes/notes/kaminario-concurrency-bug-e0b899a42383660c.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - Fixed concurrency issue in K2 iSCSI and FC Cinder drivers + due to possible race conditions between attach and detach + volumes and due to limitation from Kaminario K2 iSCSI and + FC arrays on concurrent operations. + To overcome array limitation, use locks and retry mechanism + on each K2 requests. To overcome race conditions, use locks + on initialize_connection and terminate_connection.