Remove locks from Pure volume drivers

If we switch to using a retry and are more careful about handling errors
when a host is deleted or volumes are attached we don’t need to use the
lock.

For initialize_connection time when we go to create a host we run the
risk that a terminate_connection running in parallel deleted our host
from underneath us before the volume was attached. To handle that we
just catch the error in that scenario and try to create the host again.

The second issue for initialize_connection is if we try to create a host
and another initialize_connection running in parallel beats us to it. If
the host already exists we can catch the exception for create_host(..)
and retry, which will give us the existing one.

For terminate_connection time we run the risk that between when we
determine a host to be ready for deletion and actually deleting it a
new volume is attached. We can just swallow this error since the Purity
management API will not allow to delete a host with connected volumes.

Change-Id: I8ae75dab279bb983a31595b6af6105a933d5c72c
Closes-Bug: #1588089
This commit is contained in:
Patrick East 2016-06-01 16:10:46 -07:00
parent cf19bc16b9
commit c4b6d51739
3 changed files with 155 additions and 27 deletions

View File

@ -792,6 +792,10 @@ class PureDriverException(VolumeDriverException):
message = _("Pure Storage Cinder driver failure: %(reason)s")
class PureRetryableException(VolumeBackendAPIException):
message = _("Retryable Pure Storage Exception encountered")
# SolidFire
class SolidFireAPIException(VolumeBackendAPIException):
message = _("Bad response from SolidFire API")

View File

@ -725,7 +725,9 @@ class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase):
self.driver.delete_volume(VOLUME)
expected = [mock.call.list_volume_private_connections(vol_name),
mock.call.disconnect_host(host_name_a, vol_name),
mock.call.list_host_connections(host_name_a, private=True),
mock.call.disconnect_host(host_name_b, vol_name),
mock.call.list_host_connections(host_name_b, private=True),
mock.call.destroy_volume(vol_name)]
self.array.assert_has_calls(expected)
@ -768,7 +770,7 @@ class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase):
# Branch with manually created host
self.driver.terminate_connection(VOLUME, ISCSI_CONNECTOR)
self.array.disconnect_host.assert_called_with("some-host", vol_name)
self.assertFalse(self.array.list_host_connections.called)
self.assertTrue(self.array.list_host_connections.called)
self.assertFalse(self.array.delete_host.called)
# Branch with host added to host group
self.array.reset_mock()
@ -822,21 +824,32 @@ class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase):
self.assertFalse(self.array.list_host_connections.called)
self.assertFalse(self.array.delete_host.called)
@mock.patch(BASE_DRIVER_OBJ + "._get_host", autospec=True)
def test_terminate_connection_host_deleted(self, mock_host):
def _test_terminate_connection_with_error(self, mock_host, error):
vol_name = VOLUME["name"] + "-cinder"
mock_host.return_value = PURE_HOST.copy()
self.array.reset_mock()
self.array.list_host_connections.return_value = []
self.array.delete_host.side_effect = \
self.purestorage_module.PureHTTPError(code=400,
text='Host does not exist.')
text=error)
self.driver.terminate_connection(VOLUME, ISCSI_CONNECTOR)
self.array.disconnect_host.assert_called_with(PURE_HOST_NAME, vol_name)
self.array.list_host_connections.assert_called_with(PURE_HOST_NAME,
private=True)
self.array.delete_host.assert_called_once_with(PURE_HOST_NAME)
@mock.patch(BASE_DRIVER_OBJ + "._get_host", autospec=True)
def test_terminate_connection_host_deleted(self, mock_host):
self._test_terminate_connection_with_error(mock_host,
'Host does not exist.')
@mock.patch(BASE_DRIVER_OBJ + "._get_host", autospec=True)
def test_terminate_connection_host_got_new_connections(self, mock_host):
self._test_terminate_connection_with_error(
mock_host,
'Host cannot be deleted due to existing connections.'
)
def test_extend_volume(self):
vol_name = VOLUME["name"] + "-cinder"
self.driver.extend_volume(VOLUME, 3)
@ -2155,6 +2168,51 @@ class PureISCSIDriverTestCase(PureDriverTestCase):
self.assertTrue(self.array.connect_host.called)
self.assertTrue(self.array.list_volume_private_connections)
@mock.patch(ISCSI_DRIVER_OBJ + "._get_chap_secret_from_init_data")
@mock.patch(ISCSI_DRIVER_OBJ + "._get_host", autospec=True)
def test_connect_host_deleted(self, mock_host, mock_get_secret):
mock_host.return_value = None
self.mock_config.use_chap_auth = True
mock_get_secret.return_value = 'abcdef'
self.array.set_host.side_effect = (
self.purestorage_module.PureHTTPError(
code=400, text='Host does not exist.'))
# Because we mocked out retry make sure we are raising the right
# exception to allow for retries to happen.
self.assertRaises(exception.PureRetryableException,
self.driver._connect,
VOLUME, ISCSI_CONNECTOR)
@mock.patch(ISCSI_DRIVER_OBJ + "._get_host", autospec=True)
def test_connect_iqn_already_in_use(self, mock_host):
mock_host.return_value = None
self.array.create_host.side_effect = (
self.purestorage_module.PureHTTPError(
code=400, text='The specified IQN is already in use.'))
# Because we mocked out retry make sure we are raising the right
# exception to allow for retries to happen.
self.assertRaises(exception.PureRetryableException,
self.driver._connect,
VOLUME, ISCSI_CONNECTOR)
@mock.patch(ISCSI_DRIVER_OBJ + "._get_host", autospec=True)
def test_connect_create_host_already_exists(self, mock_host):
mock_host.return_value = None
self.array.create_host.side_effect = (
self.purestorage_module.PureHTTPError(
code=400, text='Host already exists.'))
# Because we mocked out retry make sure we are raising the right
# exception to allow for retries to happen.
self.assertRaises(exception.PureRetryableException,
self.driver._connect,
VOLUME, ISCSI_CONNECTOR)
@mock.patch(ISCSI_DRIVER_OBJ + "._generate_chap_secret")
def test_get_chap_credentials_create_new(self, mock_generate_secret):
self.mock_utils.get_driver_initiator_data.return_value = []
@ -2302,6 +2360,20 @@ class PureFCDriverTestCase(PureDriverTestCase):
self.assertTrue(self.array.connect_host.called)
self.assertTrue(self.array.list_volume_private_connections)
@mock.patch(FC_DRIVER_OBJ + "._get_host", autospec=True)
def test_connect_wwn_already_in_use(self, mock_host):
mock_host.return_value = None
self.array.create_host.side_effect = (
self.purestorage_module.PureHTTPError(
code=400, text='The specified WWN is already in use.'))
# Because we mocked out retry make sure we are raising the right
# exception to allow for retries to happen.
self.assertRaises(exception.PureRetryableException,
self.driver._connect,
VOLUME, FC_CONNECTOR)
@ddt.ddt
class PureVolumeUpdateStatsTestCase(PureBaseSharedDriverTestCase):

View File

@ -90,6 +90,7 @@ REPLICATION_CG_NAME = "cinder-group"
CHAP_SECRET_KEY = "PURE_TARGET_CHAP_SECRET"
ERR_MSG_NOT_EXIST = "does not exist"
ERR_MSG_HOST_NOT_EXIST = "Host " + ERR_MSG_NOT_EXIST
ERR_MSG_NO_SUCH_SNAPSHOT = "No such volume or snapshot"
ERR_MSG_PENDING_ERADICATION = "has been destroyed"
ERR_MSG_ALREADY_EXISTS = "already exists"
@ -98,11 +99,11 @@ ERR_MSG_ALREADY_INCLUDES = "already includes"
ERR_MSG_ALREADY_ALLOWED = "already allowed on"
ERR_MSG_NOT_CONNECTED = "is not connected"
ERR_MSG_ALREADY_BELONGS = "already belongs to"
ERR_MSG_EXISTING_CONNECTIONS = "cannot be deleted due to existing connections"
ERR_MSG_ALREADY_IN_USE = "already in use"
EXTRA_SPECS_REPL_ENABLED = "replication_enabled"
CONNECT_LOCK_NAME = 'PureVolumeDriver_connect'
UNMANAGED_SUFFIX = '-unmanaged'
MANAGE_SNAP_REQUIRED_API_VERSIONS = ['1.4', '1.5']
REPLICATION_REQUIRED_API_VERSIONS = ['1.3', '1.4', '1.5']
@ -110,6 +111,8 @@ REPLICATION_REQUIRED_API_VERSIONS = ['1.3', '1.4', '1.5']
REPL_SETTINGS_PROPAGATE_RETRY_INTERVAL = 5 # 5 seconds
REPL_SETTINGS_PROPAGATE_MAX_RETRIES = 36 # 36 * 5 = 180 seconds
HOST_CREATE_MAX_RETRIES = 5
USER_AGENT_BASE = 'OpenStack Cinder'
@ -430,7 +433,6 @@ class PureBaseVolumeDriver(san.SanDriver):
"""
raise NotImplementedError
@utils.synchronized(CONNECT_LOCK_NAME, external=True)
def _disconnect(self, array, volume, connector, **kwargs):
vol_name = self._get_vol_name(volume)
host = self._get_host(array, connector)
@ -453,7 +455,7 @@ class PureBaseVolumeDriver(san.SanDriver):
@pure_driver_debug_trace
def _disconnect_host(self, array, host_name, vol_name):
"""Return value indicates if host was deleted on array or not"""
"""Return value indicates if host should be cleaned up."""
try:
array.disconnect_host(host_name, vol_name)
except purestorage.PureHTTPError as err:
@ -463,23 +465,42 @@ class PureBaseVolumeDriver(san.SanDriver):
ctxt.reraise = False
LOG.error(_LE("Disconnection failed with message: "
"%(msg)s."), {"msg": err.text})
if (GENERATED_NAME.match(host_name) and
not array.list_host_connections(host_name, private=True)):
LOG.info(_LI("Deleting unneeded host %(host_name)r."),
connections = None
try:
connections = array.list_host_connections(host_name, private=True)
except purestorage.PureHTTPError as err:
with excutils.save_and_reraise_exception() as ctxt:
if err.code == 400 and ERR_MSG_NOT_EXIST in err.text:
ctxt.reraise = False
# Assume still used if volumes are attached
host_still_used = bool(connections)
if GENERATED_NAME.match(host_name) and not host_still_used:
LOG.info(_LI("Attempting to delete unneeded host %(host_name)r."),
{"host_name": host_name})
try:
array.delete_host(host_name)
host_still_used = False
except purestorage.PureHTTPError as err:
with excutils.save_and_reraise_exception() as ctxt:
if err.code == 400 and ERR_MSG_NOT_EXIST in err.text:
# Happens if the host is already deleted.
# This is fine though, just treat it as a warning.
ctxt.reraise = False
LOG.warning(_LW("Purity host deletion failed: "
"%(msg)s."), {"msg": err.text})
return True
return False
if err.code == 400:
if ERR_MSG_NOT_EXIST in err.text:
# Happens if the host is already deleted.
# This is fine though, just log so we know what
# happened.
ctxt.reraise = False
host_still_used = False
LOG.debug("Purity host deletion failed: "
"%(msg)s.", {"msg": err.text})
if ERR_MSG_EXISTING_CONNECTIONS in err.text:
# If someone added a connection underneath us
# that's ok, just keep going.
ctxt.reraise = False
host_still_used = True
LOG.debug("Purity host deletion ignored: %(msg)s",
{"msg": err.text})
return not host_still_used
@pure_driver_debug_trace
def get_volume_stats(self, refresh=False):
@ -1052,6 +1073,9 @@ class PureBaseVolumeDriver(san.SanDriver):
try:
connection = array.connect_host(host_name, vol_name)
except purestorage.PureHTTPError as err:
if err.code == 400 and ERR_MSG_HOST_NOT_EXIST in err.text:
LOG.debug('Unable to attach volume to host: %s', err.text)
raise exception.PureRetryableException()
with excutils.save_and_reraise_exception() as ctxt:
if (err.code == 400 and
ERR_MSG_ALREADY_EXISTS in err.text):
@ -1575,7 +1599,8 @@ class PureISCSIDriver(PureBaseVolumeDriver, san.SanISCSIDriver):
return username, password
@utils.synchronized(CONNECT_LOCK_NAME, external=True)
@utils.retry(exception.PureRetryableException,
retries=HOST_CREATE_MAX_RETRIES)
def _connect(self, volume, connector):
"""Connect the host and volume; return dict describing connection."""
iqn = connector["initiator"]
@ -1615,12 +1640,29 @@ class PureISCSIDriver(PureBaseVolumeDriver, san.SanISCSIDriver):
host_name = self._generate_purity_host_name(connector["host"])
LOG.info(_LI("Creating host object %(host_name)r with IQN:"
" %(iqn)s."), {"host_name": host_name, "iqn": iqn})
current_array.create_host(host_name, iqnlist=[iqn])
try:
current_array.create_host(host_name, iqnlist=[iqn])
except purestorage.PureHTTPError as err:
if (err.code == 400 and
(ERR_MSG_ALREADY_EXISTS in err.text or
ERR_MSG_ALREADY_IN_USE in err.text)):
# If someone created it before we could just retry, we will
# pick up the new host.
LOG.debug('Unable to create host: %s', err.text)
raise exception.PureRetryableException()
if self.configuration.use_chap_auth:
current_array.set_host(host_name,
host_user=chap_username,
host_password=chap_password)
try:
current_array.set_host(host_name,
host_user=chap_username,
host_password=chap_password)
except purestorage.PureHTTPError as err:
if (err.code == 400 and
ERR_MSG_HOST_NOT_EXIST in err.text):
# If the host disappeared out from under us that's ok,
# we will just retry and snag a new host.
LOG.debug('Unable to set CHAP info: %s', err.text)
raise exception.PureRetryableException()
connection = self._connect_host_to_vol(current_array,
host_name,
@ -1679,7 +1721,8 @@ class PureFCDriver(PureBaseVolumeDriver, driver.FibreChannelDriver):
return properties
@utils.synchronized(CONNECT_LOCK_NAME, external=True)
@utils.retry(exception.PureRetryableException,
retries=HOST_CREATE_MAX_RETRIES)
def _connect(self, volume, connector):
"""Connect the host and volume; return dict describing connection."""
wwns = connector["wwpns"]
@ -1696,7 +1739,16 @@ class PureFCDriver(PureBaseVolumeDriver, driver.FibreChannelDriver):
host_name = self._generate_purity_host_name(connector["host"])
LOG.info(_LI("Creating host object %(host_name)r with WWN:"
" %(wwn)s."), {"host_name": host_name, "wwn": wwns})
current_array.create_host(host_name, wwnlist=wwns)
try:
current_array.create_host(host_name, wwnlist=wwns)
except purestorage.PureHTTPError as err:
if (err.code == 400 and
(ERR_MSG_ALREADY_EXISTS in err.text or
ERR_MSG_ALREADY_IN_USE in err.text)):
# If someone created it before we could just retry, we will
# pick up the new host.
LOG.debug('Unable to create host: %s', err.text)
raise exception.PureRetryableException()
return self._connect_host_to_vol(current_array, host_name, vol_name)