Merge "Enable fail back in SolidFire driver"

This commit is contained in:
Zuul 2017-12-23 16:26:48 +00:00 committed by Gerrit Code Review
commit 7ac6ce4469
3 changed files with 155 additions and 48 deletions

View File

@ -1104,7 +1104,7 @@ class SolidFireVolumeTestCase(test.TestCase):
self.assertEqual('1.1.1.1:3260 0', v['provider_location']) self.assertEqual('1.1.1.1:3260 0', v['provider_location'])
configured_svip = '9.9.9.9:6500' configured_svip = '9.9.9.9:6500'
sfv.active_cluster_info['svip'] = configured_svip sfv.active_cluster['svip'] = configured_svip
v = sfv._get_model_info(sfaccount, 1) v = sfv._get_model_info(sfaccount, 1)
self.assertEqual('%s 0' % configured_svip, v['provider_location']) self.assertEqual('%s 0' % configured_svip, v['provider_location'])
@ -1969,7 +1969,7 @@ class SolidFireVolumeTestCase(test.TestCase):
'fake-mvip'}] 'fake-mvip'}]
ctxt = None ctxt = None
type_id = '290edb2a-f5ea-11e5-9ce9-5e5517507c66' type_id = '290edb2a-f5ea-11e5-9ce9-5e5517507c66'
fake_type = {'extra_specs': {'replication': 'enabled'}} fake_type = {'extra_specs': {'replication_enabled': '<is> True'}}
with mock.patch.object(volume_types, with mock.patch.object(volume_types,
'get_volume_type', 'get_volume_type',
return_value=fake_type): return_value=fake_type):

View File

@ -166,10 +166,11 @@ class SolidFireDriver(san.SanISCSIDriver):
2.0.8 - Add active status filter to get volume ops 2.0.8 - Add active status filter to get volume ops
2.0.9 - Always purge on delete volume 2.0.9 - Always purge on delete volume
2.0.10 - Add response to debug on retryable errors 2.0.10 - Add response to debug on retryable errors
2.0.11 - Add ability to failback replicating volumes
""" """
VERSION = '2.0.10' VERSION = '2.0.11'
# ThirdPartySystems wiki page # ThirdPartySystems wiki page
CI_WIKI_NAME = "NetApp_SolidFire_CI" CI_WIKI_NAME = "NetApp_SolidFire_CI"
@ -210,7 +211,7 @@ class SolidFireDriver(san.SanISCSIDriver):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(SolidFireDriver, self).__init__(*args, **kwargs) super(SolidFireDriver, self).__init__(*args, **kwargs)
self.failed_over_id = kwargs.get('active_backend_id', None) self.failed_over_id = kwargs.get('active_backend_id', None)
self.active_cluster_info = {} self.replication_status = kwargs.get('replication_status', "na")
self.configuration.append_config_values(sf_opts) self.configuration.append_config_values(sf_opts)
self.template_account_id = None self.template_account_id = None
self.max_volumes_per_account = 1990 self.max_volumes_per_account = 1990
@ -220,17 +221,26 @@ class SolidFireDriver(san.SanISCSIDriver):
self.failed_over = False self.failed_over = False
self.target_driver = SolidFireISCSI(solidfire_driver=self, self.target_driver = SolidFireISCSI(solidfire_driver=self,
configuration=self.configuration) configuration=self.configuration)
self.default_cluster = self._create_cluster_reference()
self.active_cluster = self.default_cluster
# If we're failed over, we need to parse things out and set the active
# cluster appropriately
if self.failed_over_id: if self.failed_over_id:
self.failed_over = True
remote_info = self._get_remote_info_by_id(self.failed_over_id) remote_info = self._get_remote_info_by_id(self.failed_over_id)
if remote_info: if remote_info:
self._set_active_cluster_info(remote_info['endpoint']) self.active_cluster = self._create_cluster_reference(
remote_info['endpoint'])
else: else:
LOG.error('Failed to initialize SolidFire driver to ' LOG.error('Failed to initialize SolidFire driver to '
'a remote cluster specified at id: %s', 'a remote cluster specified at id: %s',
self.failed_over_id) self.failed_over_id)
else:
self._set_active_cluster_info()
# NOTE(jdg): This works even in a failed over state, because what we
# do is use self.active_cluster in issue_api_request so by default we
# always use the currently active cluster, override that by provding
# an endpoint to issue_api_request if needed
try: try:
self._update_cluster_status() self._update_cluster_status()
except exception.SolidFireAPIException: except exception.SolidFireAPIException:
@ -240,8 +250,7 @@ class SolidFireDriver(san.SanISCSIDriver):
account = self.configuration.sf_template_account_name account = self.configuration.sf_template_account_name
self.template_account_id = self._create_template_account(account) self.template_account_id = self._create_template_account(account)
if not self.failed_over_id: self._set_cluster_pairs()
self._set_cluster_pairs()
def locked_image_id_operation(f, external=False): def locked_image_id_operation(f, external=False):
def lvo_inner1(inst, *args, **kwargs): def lvo_inner1(inst, *args, **kwargs):
@ -348,7 +357,8 @@ class SolidFireDriver(san.SanISCSIDriver):
remote_info['clusterPairID'] = ep['clusterPairID'] remote_info['clusterPairID'] = ep['clusterPairID']
break break
if not remote_pair: if (not remote_pair and
remote_info['mvip'] != self.active_cluster['mvip']):
# NOTE(jdg): create_remote_pairing sets the # NOTE(jdg): create_remote_pairing sets the
# clusterPairID in remote_info for us # clusterPairID in remote_info for us
self._create_remote_pairing(remote_info) self._create_remote_pairing(remote_info)
@ -356,23 +366,51 @@ class SolidFireDriver(san.SanISCSIDriver):
LOG.debug("Setting replication_enabled to True.") LOG.debug("Setting replication_enabled to True.")
self.replication_enabled = True self.replication_enabled = True
def _set_active_cluster_info(self, endpoint=None): def _create_cluster_reference(self, endpoint=None):
cluster_ref = {}
cluster_ref['endpoint'] = endpoint
if not endpoint: if not endpoint:
self.active_cluster_info['endpoint'] = self._build_endpoint_info() cluster_ref['endpoint'] = self._build_endpoint_info()
cluster_info = (self._issue_api_request(
'GetClusterInfo', {}, endpoint=cluster_ref['endpoint'])
['result']['clusterInfo'])
for k, v in cluster_info.items():
cluster_ref[k] = v
# Add a couple extra things that are handy for us
cluster_ref['clusterAPIVersion'] = (
self._issue_api_request('GetClusterVersionInfo',
{}, endpoint=cluster_ref['endpoint'])
['result']['clusterAPIVersion'])
# FIXME(jdg): This is fine for the default/base cluster, but
# if we have a secondary configured, and are using vlans etc
# we don't use what's in the config (that's the primary only),
# we need to set this from the replication_device config
if self.configuration.get('sf_svip', None):
cluster_ref['svip'] = (
self.configuration.get('sf_svip'))
return cluster_ref
def _set_active_cluster(self, endpoint=None):
if not endpoint:
self.active_cluster['endpoint'] = self._build_endpoint_info()
else: else:
self.active_cluster_info['endpoint'] = endpoint self.active_cluster['endpoint'] = endpoint
for k, v in self._issue_api_request( for k, v in self._issue_api_request(
'GetClusterInfo', 'GetClusterInfo',
{})['result']['clusterInfo'].items(): {})['result']['clusterInfo'].items():
self.active_cluster_info[k] = v self.active_cluster[k] = v
# Add a couple extra things that are handy for us # Add a couple extra things that are handy for us
self.active_cluster_info['clusterAPIVersion'] = ( self.active_cluster['clusterAPIVersion'] = (
self._issue_api_request('GetClusterVersionInfo', self._issue_api_request('GetClusterVersionInfo',
{})['result']['clusterAPIVersion']) {})['result']['clusterAPIVersion'])
if self.configuration.get('sf_svip', None): if self.configuration.get('sf_svip', None):
self.active_cluster_info['svip'] = ( self.active_cluster['svip'] = (
self.configuration.get('sf_svip')) self.configuration.get('sf_svip'))
def _create_provider_id_string(self, def _create_provider_id_string(self,
@ -383,7 +421,7 @@ class SolidFireDriver(san.SanISCSIDriver):
# swap that with the parent volume id # swap that with the parent volume id
return "%s %s %s" % (resource_id, return "%s %s %s" % (resource_id,
account_or_vol_id, account_or_vol_id,
self.active_cluster_info['uuid']) self.active_cluster['uuid'])
def _init_snapshot_mappings(self, srefs): def _init_snapshot_mappings(self, srefs):
updates = [] updates = []
@ -470,7 +508,7 @@ class SolidFireDriver(san.SanISCSIDriver):
if params is None: if params is None:
params = {} params = {}
if endpoint is None: if endpoint is None:
endpoint = self.active_cluster_info['endpoint'] endpoint = self.active_cluster['endpoint']
payload = {'method': method, 'params': params} payload = {'method': method, 'params': params}
url = '%s/json-rpc/%s/' % (endpoint['url'], version) url = '%s/json-rpc/%s/' % (endpoint['url'], version)
@ -577,7 +615,7 @@ class SolidFireDriver(san.SanISCSIDriver):
if endpoint: if endpoint:
iscsi_portal = endpoint['svip'] iscsi_portal = endpoint['svip']
else: else:
iscsi_portal = self.active_cluster_info['svip'] iscsi_portal = self.active_cluster['svip']
if ':' not in iscsi_portal: if ':' not in iscsi_portal:
iscsi_portal += ':3260' iscsi_portal += ':3260'
@ -1343,7 +1381,11 @@ class SolidFireDriver(san.SanISCSIDriver):
type_ref = volume_types.get_volume_type(ctxt, type_id) type_ref = volume_types.get_volume_type(ctxt, type_id)
specs = type_ref.get('extra_specs') specs = type_ref.get('extra_specs')
if specs.get('replication', 'disabled').lower() == 'enabled': # We use the replication_enabled flag for both the trigger in the
# driver, as well as capabilities for scheduler. Note we don't
# require or check for the additional "replication:True|False"
# spec in the type any longer.
if specs.get('replication_enabled', "") == "<is> True":
rep_opts['targets'] = specs.get( rep_opts['targets'] = specs.get(
'solidfire:replication_targets', self.cluster_pairs[0]) 'solidfire:replication_targets', self.cluster_pairs[0])
return rep_opts return rep_opts
@ -1824,7 +1866,7 @@ class SolidFireDriver(san.SanISCSIDriver):
data['replication_enabled'] = self.replication_enabled data['replication_enabled'] = self.replication_enabled
if self.replication_enabled: if self.replication_enabled:
data['replication'] = 'enabled' data['replication'] = 'enabled'
data['active_cluster_mvip'] = self.active_cluster_info['mvip'] data['active_cluster_mvip'] = self.active_cluster['mvip']
data['reserved_percentage'] = self.configuration.reserved_percentage data['reserved_percentage'] = self.configuration.reserved_percentage
data['QoS_support'] = True data['QoS_support'] = True
@ -2058,35 +2100,88 @@ class SolidFireDriver(san.SanISCSIDriver):
self._issue_api_request('ModifyVolume', self._issue_api_request('ModifyVolume',
params, version='5.0') params, version='5.0')
def _failover_volume(self, remote_vol, remote): def _failover_volume(self, src_vol, tgt_vol, tgt_cluster):
"""Modify remote volume to R/W mode.""" """Modify remote volume to R/W mode."""
self._issue_api_request( # Put the src in tgt mode assuming it's still available
'RemoveVolumePair', # catch the exception if the cluster isn't available and
{'volumeID': remote_vol['volumeID']}, # continue on
endpoint=remote['endpoint'], version='7.0') params = {'volumeID': src_vol['volumeID'],
'access': 'replicationTarget'}
try:
self._issue_api_request('ModifyVolume', params)
except exception.SolidFireAPIException:
# FIXME
pass
params = {'volumeID': remote_vol['volumeID'], # Now call out to the remote and make the tgt our new src
params = {'volumeID': tgt_vol['volumeID'],
'access': 'readWrite'} 'access': 'readWrite'}
self._issue_api_request('ModifyVolume', params, self._issue_api_request('ModifyVolume', params,
endpoint=remote['endpoint']) endpoint=tgt_cluster['endpoint'])
def failover_host(self, context, volumes, secondary_id=None, groups=None): def failover_host(self, context, volumes, secondary_id=None, groups=None):
"""Failover to replication target.""" """Failover to replication target.
In order to do failback, you MUST specify the original/default cluster
using secondary_id option. You can do this simply by specifying:
`secondary_id=default`
"""
failback = False
volume_updates = [] volume_updates = []
remote = None remote = None
secondary_id = secondary_id.lower() if secondary_id else None
# FIXME(jdg): There's an awful lot going on in this if/else block
# it's pretty simple in terms of what it does, but would be
# good to come back and clean it up and make it a bit more
# readable/maintainable.
# There's two cases we have to deal with
# 1. Caller specified a backend target to fail too
# 2. Caller just wants to failover to anything available
# In case `1` we need to check if they specified the default
# and want to failback, so make sure we're even failed-over
#
# In case `2` they didn't specify a target, but if we're failed
# over already, can't just grab a target off the list, we might
# already be on that target, so check that and try and go back to
# whence you came
if secondary_id: if secondary_id:
for rc in self.cluster_pairs: if secondary_id == "default" and not self.failed_over:
if rc['mvip'] == secondary_id: LOG.error("SolidFire driver received failover_host "
remote = rc "specifying failback to default, the "
break "host however is not in `failed_over` "
"state, so can't failback.")
raise exception.InvalidReplicationTarget
elif secondary_id == "default" and self.failed_over:
remote = self.default_cluster
failback = True
# TODO(jdg): Add a simple check here to make
# sure the default is online
else:
for rc in self.cluster_pairs:
if rc['mvip'] == secondary_id:
remote = rc
break
if not remote: if not remote:
LOG.error("SolidFire driver received failover_host " LOG.error("SolidFire driver received failover_host "
"but was unable to find specified replication " "but was unable to find specified replication "
"pair with id: %s.", secondary_id) "pair with id: %s.", secondary_id)
raise exception.InvalidReplicationTarget raise exception.InvalidReplicationTarget
else: else:
remote = self.cluster_pairs[0] # Otherwise, we just grab a target off the list
# but beware, we may already be failed over and there
# may not be another target left, so recycle back to
# the default
if self.failed_over:
for cp in self.cluster_pairs:
if cp['endpoint'] != self.active_cluster['endpoint']:
remote = cp
if not remote:
remote = self.default_cluster
failback = True
else:
remote = self.cluster_pairs[0]
if not remote or not self.replication_enabled: if not remote or not self.replication_enabled:
LOG.error("SolidFire driver received failover_host " LOG.error("SolidFire driver received failover_host "
@ -2097,24 +2192,25 @@ class SolidFireDriver(san.SanISCSIDriver):
"on non replicated " "on non replicated "
"backend.")) "backend."))
remote_vols = self._map_sf_volumes(volumes, # Ok, that was annoying; get on with it
target_vols = self._map_sf_volumes(volumes,
endpoint=remote['endpoint']) endpoint=remote['endpoint'])
primary_vols = self._map_sf_volumes(volumes) primary_vols = self._map_sf_volumes(volumes)
for v in volumes: for v in volumes:
remote_vlist = [sfv for sfv in remote_vols target_vlist = [sfv for sfv in target_vols
if sfv['cinder_id'] == v['id']] if sfv['cinder_id'] == v['id']]
if len(remote_vlist) > 0: if len(target_vlist) > 0:
remote_vol = remote_vlist[0] target_vol = target_vlist[0]
self._failover_volume(remote_vol, remote) # BOOKMARK This fails on failback using 'default'
#
primary_vol = [sfv for sfv in primary_vols if primary_vol = [sfv for sfv in primary_vols if
sfv['cinder_id'] == v['id']][0] sfv['cinder_id'] == v['id']][0]
if len(primary_vol['volumePairs']) > 0: self._failover_volume(primary_vol, target_vol, remote)
self._issue_api_request(
'RemoveVolumePair', # Now we need to update the iqn of the volume to match
{'volumeID': primary_vol['volumeID']}, # the target svip etc
version='7.0') iqn = target_vol['iqn']
iqn = remote_vol['iqn']
volume_updates.append( volume_updates.append(
{'volume_id': v['id'], {'volume_id': v['id'],
'updates': { 'updates': {
@ -2131,10 +2227,14 @@ class SolidFireDriver(san.SanISCSIDriver):
# has been pretty much stateless and has allowed customers to run # has been pretty much stateless and has allowed customers to run
# active/active HA c-vol services with SolidFire. The introduction of # active/active HA c-vol services with SolidFire. The introduction of
# the active_cluster and failed_over attributes is going to break that # the active_cluster and failed_over attributes is going to break that
# but for now that's going to be the trade off of using replciation # but for now that's going to be the trade off of using replication
self.active_cluster_info = remote active_cluster_id = remote['mvip']
self.active_cluster = remote
self.failed_over = True self.failed_over = True
return remote['mvip'], volume_updates, [] if failback:
active_cluster_id = 'default'
return active_cluster_id, volume_updates, []
def freeze_backend(self, context): def freeze_backend(self, context):
"""Freeze backend notification.""" """Freeze backend notification."""

View File

@ -0,0 +1,7 @@
---
features:
- |
Add ability to call failover-host on a replication
enabled SF cluster a second time with host id = default
to initiate a failback to the default configured SolidFire
Cluster.