Merge "Enable fail back in SolidFire driver"
This commit is contained in:
commit
7ac6ce4469
@ -1104,7 +1104,7 @@ class SolidFireVolumeTestCase(test.TestCase):
|
|||||||
self.assertEqual('1.1.1.1:3260 0', v['provider_location'])
|
self.assertEqual('1.1.1.1:3260 0', v['provider_location'])
|
||||||
|
|
||||||
configured_svip = '9.9.9.9:6500'
|
configured_svip = '9.9.9.9:6500'
|
||||||
sfv.active_cluster_info['svip'] = configured_svip
|
sfv.active_cluster['svip'] = configured_svip
|
||||||
v = sfv._get_model_info(sfaccount, 1)
|
v = sfv._get_model_info(sfaccount, 1)
|
||||||
self.assertEqual('%s 0' % configured_svip, v['provider_location'])
|
self.assertEqual('%s 0' % configured_svip, v['provider_location'])
|
||||||
|
|
||||||
@ -1969,7 +1969,7 @@ class SolidFireVolumeTestCase(test.TestCase):
|
|||||||
'fake-mvip'}]
|
'fake-mvip'}]
|
||||||
ctxt = None
|
ctxt = None
|
||||||
type_id = '290edb2a-f5ea-11e5-9ce9-5e5517507c66'
|
type_id = '290edb2a-f5ea-11e5-9ce9-5e5517507c66'
|
||||||
fake_type = {'extra_specs': {'replication': 'enabled'}}
|
fake_type = {'extra_specs': {'replication_enabled': '<is> True'}}
|
||||||
with mock.patch.object(volume_types,
|
with mock.patch.object(volume_types,
|
||||||
'get_volume_type',
|
'get_volume_type',
|
||||||
return_value=fake_type):
|
return_value=fake_type):
|
||||||
|
@ -166,10 +166,11 @@ class SolidFireDriver(san.SanISCSIDriver):
|
|||||||
2.0.8 - Add active status filter to get volume ops
|
2.0.8 - Add active status filter to get volume ops
|
||||||
2.0.9 - Always purge on delete volume
|
2.0.9 - Always purge on delete volume
|
||||||
2.0.10 - Add response to debug on retryable errors
|
2.0.10 - Add response to debug on retryable errors
|
||||||
|
2.0.11 - Add ability to failback replicating volumes
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
VERSION = '2.0.10'
|
VERSION = '2.0.11'
|
||||||
|
|
||||||
# ThirdPartySystems wiki page
|
# ThirdPartySystems wiki page
|
||||||
CI_WIKI_NAME = "NetApp_SolidFire_CI"
|
CI_WIKI_NAME = "NetApp_SolidFire_CI"
|
||||||
@ -210,7 +211,7 @@ class SolidFireDriver(san.SanISCSIDriver):
|
|||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super(SolidFireDriver, self).__init__(*args, **kwargs)
|
super(SolidFireDriver, self).__init__(*args, **kwargs)
|
||||||
self.failed_over_id = kwargs.get('active_backend_id', None)
|
self.failed_over_id = kwargs.get('active_backend_id', None)
|
||||||
self.active_cluster_info = {}
|
self.replication_status = kwargs.get('replication_status', "na")
|
||||||
self.configuration.append_config_values(sf_opts)
|
self.configuration.append_config_values(sf_opts)
|
||||||
self.template_account_id = None
|
self.template_account_id = None
|
||||||
self.max_volumes_per_account = 1990
|
self.max_volumes_per_account = 1990
|
||||||
@ -220,17 +221,26 @@ class SolidFireDriver(san.SanISCSIDriver):
|
|||||||
self.failed_over = False
|
self.failed_over = False
|
||||||
self.target_driver = SolidFireISCSI(solidfire_driver=self,
|
self.target_driver = SolidFireISCSI(solidfire_driver=self,
|
||||||
configuration=self.configuration)
|
configuration=self.configuration)
|
||||||
|
self.default_cluster = self._create_cluster_reference()
|
||||||
|
self.active_cluster = self.default_cluster
|
||||||
|
|
||||||
|
# If we're failed over, we need to parse things out and set the active
|
||||||
|
# cluster appropriately
|
||||||
if self.failed_over_id:
|
if self.failed_over_id:
|
||||||
|
self.failed_over = True
|
||||||
remote_info = self._get_remote_info_by_id(self.failed_over_id)
|
remote_info = self._get_remote_info_by_id(self.failed_over_id)
|
||||||
if remote_info:
|
if remote_info:
|
||||||
self._set_active_cluster_info(remote_info['endpoint'])
|
self.active_cluster = self._create_cluster_reference(
|
||||||
|
remote_info['endpoint'])
|
||||||
else:
|
else:
|
||||||
LOG.error('Failed to initialize SolidFire driver to '
|
LOG.error('Failed to initialize SolidFire driver to '
|
||||||
'a remote cluster specified at id: %s',
|
'a remote cluster specified at id: %s',
|
||||||
self.failed_over_id)
|
self.failed_over_id)
|
||||||
else:
|
|
||||||
self._set_active_cluster_info()
|
|
||||||
|
|
||||||
|
# NOTE(jdg): This works even in a failed over state, because what we
|
||||||
|
# do is use self.active_cluster in issue_api_request so by default we
|
||||||
|
# always use the currently active cluster, override that by provding
|
||||||
|
# an endpoint to issue_api_request if needed
|
||||||
try:
|
try:
|
||||||
self._update_cluster_status()
|
self._update_cluster_status()
|
||||||
except exception.SolidFireAPIException:
|
except exception.SolidFireAPIException:
|
||||||
@ -240,8 +250,7 @@ class SolidFireDriver(san.SanISCSIDriver):
|
|||||||
account = self.configuration.sf_template_account_name
|
account = self.configuration.sf_template_account_name
|
||||||
self.template_account_id = self._create_template_account(account)
|
self.template_account_id = self._create_template_account(account)
|
||||||
|
|
||||||
if not self.failed_over_id:
|
self._set_cluster_pairs()
|
||||||
self._set_cluster_pairs()
|
|
||||||
|
|
||||||
def locked_image_id_operation(f, external=False):
|
def locked_image_id_operation(f, external=False):
|
||||||
def lvo_inner1(inst, *args, **kwargs):
|
def lvo_inner1(inst, *args, **kwargs):
|
||||||
@ -348,7 +357,8 @@ class SolidFireDriver(san.SanISCSIDriver):
|
|||||||
remote_info['clusterPairID'] = ep['clusterPairID']
|
remote_info['clusterPairID'] = ep['clusterPairID']
|
||||||
break
|
break
|
||||||
|
|
||||||
if not remote_pair:
|
if (not remote_pair and
|
||||||
|
remote_info['mvip'] != self.active_cluster['mvip']):
|
||||||
# NOTE(jdg): create_remote_pairing sets the
|
# NOTE(jdg): create_remote_pairing sets the
|
||||||
# clusterPairID in remote_info for us
|
# clusterPairID in remote_info for us
|
||||||
self._create_remote_pairing(remote_info)
|
self._create_remote_pairing(remote_info)
|
||||||
@ -356,23 +366,51 @@ class SolidFireDriver(san.SanISCSIDriver):
|
|||||||
LOG.debug("Setting replication_enabled to True.")
|
LOG.debug("Setting replication_enabled to True.")
|
||||||
self.replication_enabled = True
|
self.replication_enabled = True
|
||||||
|
|
||||||
def _set_active_cluster_info(self, endpoint=None):
|
def _create_cluster_reference(self, endpoint=None):
|
||||||
|
cluster_ref = {}
|
||||||
|
cluster_ref['endpoint'] = endpoint
|
||||||
if not endpoint:
|
if not endpoint:
|
||||||
self.active_cluster_info['endpoint'] = self._build_endpoint_info()
|
cluster_ref['endpoint'] = self._build_endpoint_info()
|
||||||
|
|
||||||
|
cluster_info = (self._issue_api_request(
|
||||||
|
'GetClusterInfo', {}, endpoint=cluster_ref['endpoint'])
|
||||||
|
['result']['clusterInfo'])
|
||||||
|
|
||||||
|
for k, v in cluster_info.items():
|
||||||
|
cluster_ref[k] = v
|
||||||
|
|
||||||
|
# Add a couple extra things that are handy for us
|
||||||
|
cluster_ref['clusterAPIVersion'] = (
|
||||||
|
self._issue_api_request('GetClusterVersionInfo',
|
||||||
|
{}, endpoint=cluster_ref['endpoint'])
|
||||||
|
['result']['clusterAPIVersion'])
|
||||||
|
|
||||||
|
# FIXME(jdg): This is fine for the default/base cluster, but
|
||||||
|
# if we have a secondary configured, and are using vlans etc
|
||||||
|
# we don't use what's in the config (that's the primary only),
|
||||||
|
# we need to set this from the replication_device config
|
||||||
|
if self.configuration.get('sf_svip', None):
|
||||||
|
cluster_ref['svip'] = (
|
||||||
|
self.configuration.get('sf_svip'))
|
||||||
|
return cluster_ref
|
||||||
|
|
||||||
|
def _set_active_cluster(self, endpoint=None):
|
||||||
|
if not endpoint:
|
||||||
|
self.active_cluster['endpoint'] = self._build_endpoint_info()
|
||||||
else:
|
else:
|
||||||
self.active_cluster_info['endpoint'] = endpoint
|
self.active_cluster['endpoint'] = endpoint
|
||||||
|
|
||||||
for k, v in self._issue_api_request(
|
for k, v in self._issue_api_request(
|
||||||
'GetClusterInfo',
|
'GetClusterInfo',
|
||||||
{})['result']['clusterInfo'].items():
|
{})['result']['clusterInfo'].items():
|
||||||
self.active_cluster_info[k] = v
|
self.active_cluster[k] = v
|
||||||
|
|
||||||
# Add a couple extra things that are handy for us
|
# Add a couple extra things that are handy for us
|
||||||
self.active_cluster_info['clusterAPIVersion'] = (
|
self.active_cluster['clusterAPIVersion'] = (
|
||||||
self._issue_api_request('GetClusterVersionInfo',
|
self._issue_api_request('GetClusterVersionInfo',
|
||||||
{})['result']['clusterAPIVersion'])
|
{})['result']['clusterAPIVersion'])
|
||||||
if self.configuration.get('sf_svip', None):
|
if self.configuration.get('sf_svip', None):
|
||||||
self.active_cluster_info['svip'] = (
|
self.active_cluster['svip'] = (
|
||||||
self.configuration.get('sf_svip'))
|
self.configuration.get('sf_svip'))
|
||||||
|
|
||||||
def _create_provider_id_string(self,
|
def _create_provider_id_string(self,
|
||||||
@ -383,7 +421,7 @@ class SolidFireDriver(san.SanISCSIDriver):
|
|||||||
# swap that with the parent volume id
|
# swap that with the parent volume id
|
||||||
return "%s %s %s" % (resource_id,
|
return "%s %s %s" % (resource_id,
|
||||||
account_or_vol_id,
|
account_or_vol_id,
|
||||||
self.active_cluster_info['uuid'])
|
self.active_cluster['uuid'])
|
||||||
|
|
||||||
def _init_snapshot_mappings(self, srefs):
|
def _init_snapshot_mappings(self, srefs):
|
||||||
updates = []
|
updates = []
|
||||||
@ -470,7 +508,7 @@ class SolidFireDriver(san.SanISCSIDriver):
|
|||||||
if params is None:
|
if params is None:
|
||||||
params = {}
|
params = {}
|
||||||
if endpoint is None:
|
if endpoint is None:
|
||||||
endpoint = self.active_cluster_info['endpoint']
|
endpoint = self.active_cluster['endpoint']
|
||||||
|
|
||||||
payload = {'method': method, 'params': params}
|
payload = {'method': method, 'params': params}
|
||||||
url = '%s/json-rpc/%s/' % (endpoint['url'], version)
|
url = '%s/json-rpc/%s/' % (endpoint['url'], version)
|
||||||
@ -577,7 +615,7 @@ class SolidFireDriver(san.SanISCSIDriver):
|
|||||||
if endpoint:
|
if endpoint:
|
||||||
iscsi_portal = endpoint['svip']
|
iscsi_portal = endpoint['svip']
|
||||||
else:
|
else:
|
||||||
iscsi_portal = self.active_cluster_info['svip']
|
iscsi_portal = self.active_cluster['svip']
|
||||||
|
|
||||||
if ':' not in iscsi_portal:
|
if ':' not in iscsi_portal:
|
||||||
iscsi_portal += ':3260'
|
iscsi_portal += ':3260'
|
||||||
@ -1343,7 +1381,11 @@ class SolidFireDriver(san.SanISCSIDriver):
|
|||||||
type_ref = volume_types.get_volume_type(ctxt, type_id)
|
type_ref = volume_types.get_volume_type(ctxt, type_id)
|
||||||
specs = type_ref.get('extra_specs')
|
specs = type_ref.get('extra_specs')
|
||||||
|
|
||||||
if specs.get('replication', 'disabled').lower() == 'enabled':
|
# We use the replication_enabled flag for both the trigger in the
|
||||||
|
# driver, as well as capabilities for scheduler. Note we don't
|
||||||
|
# require or check for the additional "replication:True|False"
|
||||||
|
# spec in the type any longer.
|
||||||
|
if specs.get('replication_enabled', "") == "<is> True":
|
||||||
rep_opts['targets'] = specs.get(
|
rep_opts['targets'] = specs.get(
|
||||||
'solidfire:replication_targets', self.cluster_pairs[0])
|
'solidfire:replication_targets', self.cluster_pairs[0])
|
||||||
return rep_opts
|
return rep_opts
|
||||||
@ -1824,7 +1866,7 @@ class SolidFireDriver(san.SanISCSIDriver):
|
|||||||
data['replication_enabled'] = self.replication_enabled
|
data['replication_enabled'] = self.replication_enabled
|
||||||
if self.replication_enabled:
|
if self.replication_enabled:
|
||||||
data['replication'] = 'enabled'
|
data['replication'] = 'enabled'
|
||||||
data['active_cluster_mvip'] = self.active_cluster_info['mvip']
|
data['active_cluster_mvip'] = self.active_cluster['mvip']
|
||||||
data['reserved_percentage'] = self.configuration.reserved_percentage
|
data['reserved_percentage'] = self.configuration.reserved_percentage
|
||||||
data['QoS_support'] = True
|
data['QoS_support'] = True
|
||||||
|
|
||||||
@ -2058,35 +2100,88 @@ class SolidFireDriver(san.SanISCSIDriver):
|
|||||||
self._issue_api_request('ModifyVolume',
|
self._issue_api_request('ModifyVolume',
|
||||||
params, version='5.0')
|
params, version='5.0')
|
||||||
|
|
||||||
def _failover_volume(self, remote_vol, remote):
|
def _failover_volume(self, src_vol, tgt_vol, tgt_cluster):
|
||||||
"""Modify remote volume to R/W mode."""
|
"""Modify remote volume to R/W mode."""
|
||||||
self._issue_api_request(
|
# Put the src in tgt mode assuming it's still available
|
||||||
'RemoveVolumePair',
|
# catch the exception if the cluster isn't available and
|
||||||
{'volumeID': remote_vol['volumeID']},
|
# continue on
|
||||||
endpoint=remote['endpoint'], version='7.0')
|
params = {'volumeID': src_vol['volumeID'],
|
||||||
|
'access': 'replicationTarget'}
|
||||||
|
try:
|
||||||
|
self._issue_api_request('ModifyVolume', params)
|
||||||
|
except exception.SolidFireAPIException:
|
||||||
|
# FIXME
|
||||||
|
pass
|
||||||
|
|
||||||
params = {'volumeID': remote_vol['volumeID'],
|
# Now call out to the remote and make the tgt our new src
|
||||||
|
params = {'volumeID': tgt_vol['volumeID'],
|
||||||
'access': 'readWrite'}
|
'access': 'readWrite'}
|
||||||
self._issue_api_request('ModifyVolume', params,
|
self._issue_api_request('ModifyVolume', params,
|
||||||
endpoint=remote['endpoint'])
|
endpoint=tgt_cluster['endpoint'])
|
||||||
|
|
||||||
def failover_host(self, context, volumes, secondary_id=None, groups=None):
|
def failover_host(self, context, volumes, secondary_id=None, groups=None):
|
||||||
"""Failover to replication target."""
|
"""Failover to replication target.
|
||||||
|
|
||||||
|
In order to do failback, you MUST specify the original/default cluster
|
||||||
|
using secondary_id option. You can do this simply by specifying:
|
||||||
|
`secondary_id=default`
|
||||||
|
"""
|
||||||
|
failback = False
|
||||||
volume_updates = []
|
volume_updates = []
|
||||||
remote = None
|
remote = None
|
||||||
|
secondary_id = secondary_id.lower() if secondary_id else None
|
||||||
|
|
||||||
|
# FIXME(jdg): There's an awful lot going on in this if/else block
|
||||||
|
# it's pretty simple in terms of what it does, but would be
|
||||||
|
# good to come back and clean it up and make it a bit more
|
||||||
|
# readable/maintainable.
|
||||||
|
|
||||||
|
# There's two cases we have to deal with
|
||||||
|
# 1. Caller specified a backend target to fail too
|
||||||
|
# 2. Caller just wants to failover to anything available
|
||||||
|
# In case `1` we need to check if they specified the default
|
||||||
|
# and want to failback, so make sure we're even failed-over
|
||||||
|
#
|
||||||
|
# In case `2` they didn't specify a target, but if we're failed
|
||||||
|
# over already, can't just grab a target off the list, we might
|
||||||
|
# already be on that target, so check that and try and go back to
|
||||||
|
# whence you came
|
||||||
if secondary_id:
|
if secondary_id:
|
||||||
for rc in self.cluster_pairs:
|
if secondary_id == "default" and not self.failed_over:
|
||||||
if rc['mvip'] == secondary_id:
|
LOG.error("SolidFire driver received failover_host "
|
||||||
remote = rc
|
"specifying failback to default, the "
|
||||||
break
|
"host however is not in `failed_over` "
|
||||||
|
"state, so can't failback.")
|
||||||
|
raise exception.InvalidReplicationTarget
|
||||||
|
elif secondary_id == "default" and self.failed_over:
|
||||||
|
remote = self.default_cluster
|
||||||
|
failback = True
|
||||||
|
# TODO(jdg): Add a simple check here to make
|
||||||
|
# sure the default is online
|
||||||
|
else:
|
||||||
|
for rc in self.cluster_pairs:
|
||||||
|
if rc['mvip'] == secondary_id:
|
||||||
|
remote = rc
|
||||||
|
break
|
||||||
if not remote:
|
if not remote:
|
||||||
LOG.error("SolidFire driver received failover_host "
|
LOG.error("SolidFire driver received failover_host "
|
||||||
"but was unable to find specified replication "
|
"but was unable to find specified replication "
|
||||||
"pair with id: %s.", secondary_id)
|
"pair with id: %s.", secondary_id)
|
||||||
raise exception.InvalidReplicationTarget
|
raise exception.InvalidReplicationTarget
|
||||||
else:
|
else:
|
||||||
remote = self.cluster_pairs[0]
|
# Otherwise, we just grab a target off the list
|
||||||
|
# but beware, we may already be failed over and there
|
||||||
|
# may not be another target left, so recycle back to
|
||||||
|
# the default
|
||||||
|
if self.failed_over:
|
||||||
|
for cp in self.cluster_pairs:
|
||||||
|
if cp['endpoint'] != self.active_cluster['endpoint']:
|
||||||
|
remote = cp
|
||||||
|
if not remote:
|
||||||
|
remote = self.default_cluster
|
||||||
|
failback = True
|
||||||
|
else:
|
||||||
|
remote = self.cluster_pairs[0]
|
||||||
|
|
||||||
if not remote or not self.replication_enabled:
|
if not remote or not self.replication_enabled:
|
||||||
LOG.error("SolidFire driver received failover_host "
|
LOG.error("SolidFire driver received failover_host "
|
||||||
@ -2097,24 +2192,25 @@ class SolidFireDriver(san.SanISCSIDriver):
|
|||||||
"on non replicated "
|
"on non replicated "
|
||||||
"backend."))
|
"backend."))
|
||||||
|
|
||||||
remote_vols = self._map_sf_volumes(volumes,
|
# Ok, that was annoying; get on with it
|
||||||
|
target_vols = self._map_sf_volumes(volumes,
|
||||||
endpoint=remote['endpoint'])
|
endpoint=remote['endpoint'])
|
||||||
primary_vols = self._map_sf_volumes(volumes)
|
primary_vols = self._map_sf_volumes(volumes)
|
||||||
for v in volumes:
|
for v in volumes:
|
||||||
remote_vlist = [sfv for sfv in remote_vols
|
target_vlist = [sfv for sfv in target_vols
|
||||||
if sfv['cinder_id'] == v['id']]
|
if sfv['cinder_id'] == v['id']]
|
||||||
|
|
||||||
if len(remote_vlist) > 0:
|
if len(target_vlist) > 0:
|
||||||
remote_vol = remote_vlist[0]
|
target_vol = target_vlist[0]
|
||||||
self._failover_volume(remote_vol, remote)
|
# BOOKMARK This fails on failback using 'default'
|
||||||
|
#
|
||||||
primary_vol = [sfv for sfv in primary_vols if
|
primary_vol = [sfv for sfv in primary_vols if
|
||||||
sfv['cinder_id'] == v['id']][0]
|
sfv['cinder_id'] == v['id']][0]
|
||||||
if len(primary_vol['volumePairs']) > 0:
|
self._failover_volume(primary_vol, target_vol, remote)
|
||||||
self._issue_api_request(
|
|
||||||
'RemoveVolumePair',
|
# Now we need to update the iqn of the volume to match
|
||||||
{'volumeID': primary_vol['volumeID']},
|
# the target svip etc
|
||||||
version='7.0')
|
iqn = target_vol['iqn']
|
||||||
iqn = remote_vol['iqn']
|
|
||||||
volume_updates.append(
|
volume_updates.append(
|
||||||
{'volume_id': v['id'],
|
{'volume_id': v['id'],
|
||||||
'updates': {
|
'updates': {
|
||||||
@ -2131,10 +2227,14 @@ class SolidFireDriver(san.SanISCSIDriver):
|
|||||||
# has been pretty much stateless and has allowed customers to run
|
# has been pretty much stateless and has allowed customers to run
|
||||||
# active/active HA c-vol services with SolidFire. The introduction of
|
# active/active HA c-vol services with SolidFire. The introduction of
|
||||||
# the active_cluster and failed_over attributes is going to break that
|
# the active_cluster and failed_over attributes is going to break that
|
||||||
# but for now that's going to be the trade off of using replciation
|
# but for now that's going to be the trade off of using replication
|
||||||
self.active_cluster_info = remote
|
active_cluster_id = remote['mvip']
|
||||||
|
self.active_cluster = remote
|
||||||
self.failed_over = True
|
self.failed_over = True
|
||||||
return remote['mvip'], volume_updates, []
|
if failback:
|
||||||
|
active_cluster_id = 'default'
|
||||||
|
|
||||||
|
return active_cluster_id, volume_updates, []
|
||||||
|
|
||||||
def freeze_backend(self, context):
|
def freeze_backend(self, context):
|
||||||
"""Freeze backend notification."""
|
"""Freeze backend notification."""
|
||||||
|
@ -0,0 +1,7 @@
|
|||||||
|
---
|
||||||
|
features:
|
||||||
|
- |
|
||||||
|
Add ability to call failover-host on a replication
|
||||||
|
enabled SF cluster a second time with host id = default
|
||||||
|
to initiate a failback to the default configured SolidFire
|
||||||
|
Cluster.
|
Loading…
Reference in New Issue
Block a user