Implement replicate_volume & failover on SolidFire

This adds the Cheesecake Replication implementation
for the SolidFire driver.

We add the necessary methods for creating a replicated volume and
performing a failover properly that updates the status of volumes
on the failover host.

We also make sure that things like delete, attach etc work in a
failed over scenario.

For SolidFire we are using the remote cluster mvip as the
backend_id, here's an example replication_device entry for
a cinder.conf:
    replication_device =
    backend_id:192.168.139.102,mvip:192.168.139.102,login:admin,password:admin

Note that there's an independent fix from this patch series that needs
to be applied in order for things to work properly:
  https://review.openstack.org/#/c/299723/

The last remaining task is to ensure we detect and deal with a restart
when we're in a failed over state.

Change-Id: I78244da226abce2544d0c3196fccdec50bf5b8e5
This commit is contained in:
John Griffith 2016-03-31 02:53:56 +00:00
parent 70fff3e904
commit 3f5e040e73
3 changed files with 310 additions and 33 deletions

View File

@ -108,7 +108,7 @@ class SolidFireVolumeTestCase(test.TestCase):
'thinProvisioningPercent': 100}}}
return data
elif method is 'GetClusterInfo' and version == '1.0':
elif method is 'GetClusterInfo':
results = {
'result':
{'clusterInfo':
@ -122,7 +122,13 @@ class SolidFireVolumeTestCase(test.TestCase):
return results
elif method is 'GetClusterVersionInfo':
return {'result': {'clusterAPIVersion': '8.0'}}
return {'id': None, 'result': {'softwareVersionInfo':
{'pendingVersion': '8.2.1.4',
'packageName': '',
'currentVersion': '8.2.1.4',
'nodeID': 0, 'startTime': ''},
'clusterVersion': '8.2.1.4',
'clusterAPIVersion': '8.2'}}
elif method is 'AddAccount' and version == '1.0':
return {'result': {'accountID': 25}, 'id': 1}
@ -192,6 +198,8 @@ class SolidFireVolumeTestCase(test.TestCase):
return {'result': {}}
elif method is 'GetClusterVersionInfo':
return {'result': {'clusterAPIVersion': '8.0'}}
elif method is 'StartVolumePairing':
return {'result': {'volumePairingKey': 'fake-pairing-key'}}
else:
# Crap, unimplemented API call in Fake
return None
@ -221,7 +229,7 @@ class SolidFireVolumeTestCase(test.TestCase):
def fake_get_cluster_version_info(self):
return
def fake_get_model_info(self, account, vid):
def fake_get_model_info(self, account, vid, endpoint=None):
return {'fake': 'fake-model'}
@mock.patch.object(solidfire.SolidFireDriver, '_issue_api_request')
@ -849,6 +857,7 @@ class SolidFireVolumeTestCase(test.TestCase):
self.assertEqual('UUID-a720b3c0-d1f0-11e1-9b23-0800200c9a66',
sf_vol_object['name'])
@mock.patch.object(solidfire.SolidFireDriver, '_update_cluster_status')
@mock.patch.object(solidfire.SolidFireDriver, '_issue_api_request')
@mock.patch.object(solidfire.SolidFireDriver, '_get_sfaccount')
@mock.patch.object(solidfire.SolidFireDriver, '_get_sf_volume')
@ -857,7 +866,8 @@ class SolidFireVolumeTestCase(test.TestCase):
_mock_create_image_volume,
_mock_get_sf_volume,
_mock_get_sfaccount,
_mock_issue_api_request):
_mock_issue_api_request,
_mock_update_cluster_status):
fake_sf_vref = {
'status': 'active', 'volumeID': 1,
'attributes': {
@ -867,7 +877,8 @@ class SolidFireVolumeTestCase(test.TestCase):
'image_name': 'fake-image',
'image_created_at': '2014-12-17T00:16:23+00:00'}}}
_mock_issue_api_request.return_value.side_effect = (
_mock_update_cluster_status.return_value = None
_mock_issue_api_request.side_effect = (
self.fake_issue_api_request)
_mock_get_sfaccount.return_value = {'username': 'openstack-vtemplate',
'accountID': 7777}
@ -881,10 +892,10 @@ class SolidFireVolumeTestCase(test.TestCase):
image_service = 'null'
sfv = solidfire.SolidFireDriver(configuration=self.configuration)
_mock_issue_api_request.return_value = {'result': 'ok'}
sfv._verify_image_volume(self.ctxt, image_meta, image_service)
self.assertTrue(_mock_create_image_volume.called)
@mock.patch.object(solidfire.SolidFireDriver, '_update_cluster_status')
@mock.patch.object(solidfire.SolidFireDriver, '_issue_api_request')
@mock.patch.object(solidfire.SolidFireDriver, '_get_sfaccount')
@mock.patch.object(solidfire.SolidFireDriver, '_get_sf_volume')
@ -893,8 +904,11 @@ class SolidFireVolumeTestCase(test.TestCase):
_mock_create_image_volume,
_mock_get_sf_volume,
_mock_get_sfaccount,
_mock_issue_api_request):
_mock_issue_api_request,
_mock_update_cluster_status):
_mock_issue_api_request.side_effect = self.fake_issue_api_request
_mock_update_cluster_status.return_value = None
_mock_get_sfaccount.return_value = {'username': 'openstack-vtemplate',
'accountID': 7777}
_mock_get_sf_volume.return_value =\
@ -913,10 +927,7 @@ class SolidFireVolumeTestCase(test.TestCase):
325355)}
image_service = 'null'
_mock_issue_api_request.return_value.side_effect = (
self.fake_issue_api_request)
sfv = solidfire.SolidFireDriver(configuration=self.configuration)
_mock_issue_api_request.return_value = {'result': 'ok'}
sfv._verify_image_volume(self.ctxt, image_meta, image_service)
self.assertFalse(_mock_create_image_volume.called)
@ -1001,12 +1012,13 @@ class SolidFireVolumeTestCase(test.TestCase):
def test_configured_svip(self):
sfv = solidfire.SolidFireDriver(configuration=self.configuration)
def _fake_get_volumes(account_id):
def _fake_get_volumes(account_id, endpoint=None):
return [{'volumeID': 1,
'iqn': ''}]
def _fake_get_cluster_info():
return {'clusterInfo': {'svip': 1}}
return {'clusterInfo': {'svip': '10.10.10.10',
'mvip': '1.1.1.1'}}
with mock.patch.object(sfv,
'_get_volumes_by_sfaccount',
@ -1116,7 +1128,7 @@ class SolidFireVolumeTestCase(test.TestCase):
}
connector = {'initiator': 'iqn.2012-07.org.fake:01'}
provider_id = testvol['provider_id']
vol_id = int(sfv._parse_provider_id_string(provider_id)[0])
vol_id = int(provider_id.split()[0])
vag_id = 1
with mock.patch.object(sfv,
@ -1190,7 +1202,7 @@ class SolidFireVolumeTestCase(test.TestCase):
'multiattach': False
}
provider_id = testvol['provider_id']
vol_id = int(sfv._parse_provider_id_string(provider_id)[0])
vol_id = int(provider_id.split()[0])
connector = {'initiator': 'iqn.2012-07.org.fake:01'}
vag_id = 1
vags = [{'attributes': {},
@ -1731,3 +1743,49 @@ class SolidFireVolumeTestCase(test.TestCase):
self.fail("Should have thrown Error")
except Exception:
pass
def test_set_rep_by_volume_type(self):
sfv = solidfire.SolidFireDriver(configuration=self.configuration)
sfv.cluster_pairs = [{'cluster_id': 'fake-id', 'cluster_mvip':
'fake-mvip'}]
ctxt = None
type_id = '290edb2a-f5ea-11e5-9ce9-5e5517507c66'
fake_type = {'extra_specs': {'replication': 'enabled'}}
with mock.patch.object(volume_types,
'get_volume_type',
return_value=fake_type):
self.assertEqual('fake-id', sfv._set_rep_by_volume_type(
ctxt,
type_id)['targets']['cluster_id'])
def test_replicate_volume(self):
sfv = solidfire.SolidFireDriver(configuration=self.configuration)
sfv.cluster_pairs = (
[{'uniqueID': 'lu9f', 'endpoint': {'passwd': 'admin', 'port':
443, 'url':
'https://192.168.139.102:443',
'svip': '10.10.8.134', 'mvip':
'192.168.139.102', 'login':
'admin'}, 'name':
'AutoTest2-6AjG-FOR-TEST-ONLY', 'clusterPairID': 33, 'uuid':
'9c499d4b-8fff-48b4-b875-27601d5d9889', 'svip': '10.10.23.2',
'mvipNodeID': 1, 'repCount': 1, 'encryptionAtRestState':
'disabled', 'attributes': {}, 'mvip': '192.168.139.102',
'ensemble': ['10.10.5.130'], 'svipNodeID': 1}])
with mock.patch.object(sfv,
'_issue_api_request',
self.fake_issue_api_request),\
mock.patch.object(sfv,
'_get_sfaccount_by_name',
return_value={'accountID': 1}),\
mock.patch.object(sfv,
'_do_volume_create',
return_value={'provider_id': '1 2 xxxx'}):
self.assertEqual({'provider_id': '1 2 xxxx'},
sfv._replicate_volume(
{'project_id': 1, 'volumeID': 1},
{'attributes': {}},
{'initiatorSecret': 'shhh',
'targetSecret': 'dont-tell'},
{}))

View File

@ -185,6 +185,7 @@ class SolidFireDriver(san.SanISCSIDriver):
self.volume_map = {}
self.cluster_pairs = []
self.replication_enabled = False
self.failed_over = False
self.target_driver = SolidFireISCSI(solidfire_driver=self,
configuration=self.configuration)
self._set_active_cluster_info()
@ -221,6 +222,7 @@ class SolidFireDriver(san.SanISCSIDriver):
with excutils.save_and_reraise_exception():
LOG.error(_LE('Cluster pairing failed: %s'), ex.msg)
LOG.debug(('Initialized Cluster pair with ID: %s'), pair_id)
remote_device['clusterPairID'] = pair_id
return pair_id
def _get_remote_cluster_info(self, remote_endpoint):
@ -244,15 +246,22 @@ class SolidFireDriver(san.SanISCSIDriver):
remote_endpoint = self._build_endpoint_info(**rd)
remote_info = self._get_remote_cluster_info(remote_endpoint)
remote_info['endpoint'] = remote_endpoint
if not remote_info['endpoint']['svip']:
remote_info['endpoint']['svip'] = remote_info['svip'] + ':3260'
for ep in existing_pairs:
if rd['backend_id'] == ep['mvip']:
remote_pair = ep
LOG.debug("Found remote pair: %s", remote_pair)
remote_info['clusterPairID'] = ep['clusterPairID']
break
if not remote_pair:
# NOTE(jdg): create_remote_pairing sets the
# clusterPairID in remote_info for us
self._create_remote_pairing(remote_info)
self.cluster_pairs.append(remote_info)
LOG.debug("Setting replication_enabled to True.")
self.replication_enabled = True
def _set_active_cluster_info(self):
@ -270,9 +279,6 @@ class SolidFireDriver(san.SanISCSIDriver):
self.active_cluster_info['svip'] = (
self.configuration.get('sf_svip'))
def _parse_provider_id_string(self, id_string):
return tuple(id_string.split())
def _create_provider_id_string(self,
resource_id,
account_or_vol_id):
@ -359,10 +365,9 @@ class SolidFireDriver(san.SanISCSIDriver):
kwargs.get('port', self.configuration.sf_api_port))
endpoint['url'] = 'https://%s:%s' % (endpoint['mvip'],
endpoint['port'])
endpoint['svip'] = kwargs.get('svip', self.configuration.sf_svip)
if not endpoint.get('mvip', None) and kwargs.get('backend_id', None):
endpoint['mvip'] = kwargs.get('backend_id')
# TODO(jdg): consider a call to GetAPI and setting version
return endpoint
@retry(retry_exc_tuple, tries=6)
@ -396,18 +401,22 @@ class SolidFireDriver(san.SanISCSIDriver):
return response
def _get_volumes_by_sfaccount(self, account_id):
def _get_volumes_by_sfaccount(self, account_id, endpoint=None):
"""Get all volumes on cluster for specified account."""
params = {'accountID': account_id}
return self._issue_api_request(
'ListVolumesForAccount', params)['result']['volumes']
'ListVolumesForAccount',
params,
endpoint=endpoint)['result']['volumes']
def _get_sfaccount_by_name(self, sf_account_name):
def _get_sfaccount_by_name(self, sf_account_name, endpoint=None):
"""Get SolidFire account object by name."""
sfaccount = None
params = {'username': sf_account_name}
try:
data = self._issue_api_request('GetAccountByName', params)
data = self._issue_api_request('GetAccountByName',
params,
endpoint=endpoint)
if 'result' in data and 'account' in data['result']:
LOG.debug('Found solidfire account: %s', sf_account_name)
sfaccount = data['result']['account']
@ -463,18 +472,23 @@ class SolidFireDriver(san.SanISCSIDriver):
char_set = string.ascii_uppercase + string.digits
return ''.join(random.sample(char_set, length))
def _get_model_info(self, sfaccount, sf_volume_id):
def _get_model_info(self, sfaccount, sf_volume_id, endpoint=None):
"""Gets the connection info for specified account and volume."""
iscsi_portal = self.active_cluster_info['svip']
if endpoint:
iscsi_portal = endpoint['svip']
else:
iscsi_portal = self.active_cluster_info['svip']
if ':' not in iscsi_portal:
iscsi_portal += ':3260'
chap_secret = sfaccount['targetSecret']
found_volume = False
iteration_count = 0
while not found_volume and iteration_count < 600:
volume_list = self._get_volumes_by_sfaccount(
sfaccount['accountID'])
sfaccount['accountID'], endpoint=endpoint)
iqn = None
for v in volume_list:
if v['volumeID'] == sf_volume_id:
@ -590,10 +604,11 @@ class SolidFireDriver(san.SanISCSIDriver):
params['attributes'] = attributes
return self._issue_api_request('ModifyVolume', params)
def _do_volume_create(self, sf_account, params):
def _do_volume_create(self, sf_account, params, endpoint=None):
params['accountID'] = sf_account['accountID']
sf_volid = self._issue_api_request(
'CreateVolume', params)['result']['volumeID']
return self._get_model_info(sf_account, sf_volid)
'CreateVolume', params, endpoint=endpoint)['result']['volumeID']
return self._get_model_info(sf_account, sf_volid, endpoint=endpoint)
def _do_snapshot_create(self, params):
model_update = {}
@ -864,7 +879,6 @@ class SolidFireDriver(san.SanISCSIDriver):
# Check availability for creates
sf_account = self._get_account_create_availability(sf_accounts)
if not sf_account:
# TODO(jdg): We're not doing tertiaries, so fail.
msg = _('Volumes/account exceeded on both primary and '
'secondary SolidFire accounts.')
raise exception.SolidFireDriverException(msg)
@ -1139,7 +1153,92 @@ class SolidFireDriver(san.SanISCSIDriver):
params['name'] = vname
params['attributes']['migration_uuid'] = volume['id']
params['attributes']['uuid'] = v
return self._do_volume_create(sf_account, params)
model_update = self._do_volume_create(sf_account, params)
rep_settings = self._retrieve_replication_settings(volume)
if self.replication_enabled and rep_settings:
volume['volumeID'] = int(model_update['provider_id'].split()[0])
self._replicate_volume(volume, params,
sf_account, rep_settings)
return model_update
def _retrieve_replication_settings(self, volume):
rep_data = {}
ctxt = context.get_admin_context()
type_id = volume.get('volume_type_id', None)
if type_id is not None:
rep_data = self._set_rep_by_volume_type(ctxt, type_id)
return rep_data
def _set_rep_by_volume_type(self, ctxt, type_id):
rep_opts = {}
type_ref = volume_types.get_volume_type(ctxt, type_id)
specs = type_ref.get('extra_specs')
if specs.get('replication', 'disabled').lower() == 'enabled':
rep_opts['targets'] = specs.get(
'solidfire:replication_targets', self.cluster_pairs[0])
return rep_opts
def _replicate_volume(self, volume, src_params,
parent_sfaccount, rep_info):
params = {}
# TODO(jdg): Right now we just go to first pair,
# need to add parsing of rep_info eventually
# in other words "rep_info" is not used yet!
tgt_endpoint = self.cluster_pairs[0]['endpoint']
LOG.debug("Replicating volume on remote cluster: %s", tgt_endpoint)
params['attributes'] = src_params['attributes']
params['username'] = self._get_sf_account_name(volume['project_id'])
try:
params['initiatorSecret'] = parent_sfaccount['initiatorSecret']
params['targetSecret'] = parent_sfaccount['targetSecret']
self._issue_api_request(
'AddAccount',
params,
endpoint=tgt_endpoint)['result']['accountID']
except exception.SolidFireAPIException as ex:
if 'xDuplicateUsername' not in ex.msg:
raise
remote_account = (
self._get_sfaccount_by_name(params['username'],
endpoint=tgt_endpoint))
# Create the volume on the remote cluster w/same params as original
params = src_params
params['accountID'] = remote_account['accountID']
LOG.debug("Create remote volume on: %(endpoint)s with account: "
"%(account)s",
{'endpoint': tgt_endpoint['url'], 'account': remote_account})
model_update = self._do_volume_create(
remote_account, params, endpoint=tgt_endpoint)
tgt_sfid = int(model_update['provider_id'].split()[0])
params = {'volumeID': tgt_sfid, 'access': 'replicationTarget'}
self._issue_api_request('ModifyVolume',
params,
'8.0',
endpoint=tgt_endpoint)
# Enable volume pairing
LOG.debug("Start volume pairing on volume ID: %s",
volume['volumeID'])
params = {'volumeID': volume['volumeID']}
rep_key = self._issue_api_request('StartVolumePairing',
params,
'8.0')['result']['volumePairingKey']
params = {'volumeID': tgt_sfid,
'volumePairingKey': rep_key}
LOG.debug("Issue CompleteVolumePairing request on remote: "
"%(endpoint)s, %(parameters)s",
{'endpoint': tgt_endpoint['url'], 'parameters': params})
self._issue_api_request('CompleteVolumePairing',
params,
'8.0',
endpoint=tgt_endpoint)
LOG.debug("Completed volume pairing.")
return model_update
def create_cloned_volume(self, volume, src_vref):
"""Create a clone of an existing volume."""
@ -1174,6 +1273,19 @@ class SolidFireDriver(san.SanISCSIDriver):
break
if sf_vol is not None:
for vp in sf_vol.get('volumePairs', []):
LOG.debug("Deleting paired volume on remote cluster...")
pair_id = vp['clusterPairID']
for cluster in self.cluster_pairs:
if cluster['clusterPairID'] == pair_id:
params = {'volumeID': vp['remoteVolumeID']}
LOG.debug("Issue Delete request on cluster: "
"%(remote)s with params: %(parameters)s",
{'remote': cluster['endpoint']['url'],
'parameters': params})
self._issue_api_request('DeleteVolume', params,
endpoint=cluster['endpoint'])
params = {'volumeID': sf_vol['volumeID']}
self._issue_api_request('DeleteVolume', params)
if volume.get('multiattach'):
@ -1319,6 +1431,24 @@ class SolidFireDriver(san.SanISCSIDriver):
model['status'] = 'available'
return model
def _map_sf_volumes(self, cinder_volumes, endpoint=None):
"""Get a list of SolidFire volumes.
Creates a list of SolidFire volumes based
on matching a list of cinder volume ID's,
also adds an 'cinder_id' key to match cinder.
"""
vols = self._issue_api_request(
'ListActiveVolumes', {},
endpoint=endpoint)['result']['volumes']
vlist = (
[sfvol for sfvol in vols for cv in cinder_volumes if cv['id'] in
sfvol['name']])
for v in vlist:
v['cinder_id'] = v['name'].split(
self.configuration.sf_volume_prefix)[1]
return vlist
# Required consistency group functions
def create_consistencygroup(self, ctxt, group):
# SolidFire does not have a viable means for storing consistency group
@ -1452,9 +1582,12 @@ class SolidFireDriver(san.SanISCSIDriver):
data["driver_version"] = self.VERSION
data["storage_protocol"] = 'iSCSI'
data['consistencygroup_support'] = True
# TODO(jdg): should we have a "replication_status" that includes
# enabled, disabled, failed-over, error ?
data['replication_enabled'] = self.replication_enabled
if self.replication_enabled:
data['replication'] = 'enabled'
data['active_cluster_mvip'] = self.active_cluster_info['mvip']
data['total_capacity_gb'] = (
float(results['maxProvisionedSpace'] / units.Gi))
@ -1676,6 +1809,89 @@ class SolidFireDriver(san.SanISCSIDriver):
self._issue_api_request('ModifyVolume',
params, version='5.0')
def _failover_volume(self, remote_vol, remote):
"""Modify remote volume to R/W mode."""
self._issue_api_request(
'RemoveVolumePair',
{'volumeID': remote_vol['volumeID']},
endpoint=remote['endpoint'], version='7.0')
params = {'volumeID': remote_vol['volumeID'],
'access': 'readWrite'}
self._issue_api_request('ModifyVolume', params,
endpoint=remote['endpoint'])
def failover_host(self, context, volumes, secondary_id=None):
"""Failover to replication target."""
volume_updates = []
remote = None
if secondary_id:
for rc in self.cluster_pairs:
if rc['mvip'] == secondary_id:
remote = rc
break
if not remote:
LOG.error(_LE("SolidFire driver received failover_host "
"but was unable to find specified replication "
"pair with id: %s."), secondary_id)
raise exception.InvalidReplicationTarget
else:
remote = self.cluster_pairs[0]
if not remote or not self.replication_enabled:
LOG.error(_LE("SolidFire driver received failover_host "
"request, however replication is NOT "
"enabled, or there are no available "
"targets to fail-over to."))
raise exception.UnableToFailOver
remote_vols = self._map_sf_volumes(volumes,
endpoint=remote['endpoint'])
primary_vols = self._map_sf_volumes(volumes)
for v in volumes:
remote_vlist = filter(lambda sfv: sfv['cinder_id'] == v['id'],
remote_vols)
if len(remote_vlist) > 0:
remote_vol = remote_vlist[0]
self._failover_volume(remote_vol, remote)
primary_vol = filter(lambda sfv: sfv['cinder_id'] == v['id'],
primary_vols)[0]
if len(primary_vol['volumePairs']) > 0:
self._issue_api_request(
'RemoveVolumePair',
{'volumeID': primary_vol['volumeID']},
version='7.0')
iqn = remote_vol['iqn']
volume_updates.append(
{'volume_id': v['id'],
'updates': {
'provider_location': ('%s %s %s' %
(remote['endpoint']['svip'],
iqn,
0)),
'replication_status': 'failed-over'}})
else:
volume_updates.append({'volume_id': v['id'],
'updates': {'status': 'error', }})
# FIXME(jdg): This introduces a problem for us, up until now our driver
# has been pretty much stateless and has allowed customers to run
# active/active HA c-vol services with SolidFire. The introduction of
# the active_cluster and failed_over attributes is going to break that
# but for now that's going to be the trade off of using replciation
self.active_cluster_info = remote
self.failed_over = True
return remote['mvip'], volume_updates
def freeze_backend(self, context):
"""Freeze backend notification."""
pass
def thaw_backend(self, context):
"""Thaw backend notification."""
pass
class SolidFireISCSI(iscsi_driver.SanISCSITarget):
def __init__(self, *args, **kwargs):
@ -1725,7 +1941,7 @@ class SolidFireISCSI(iscsi_driver.SanISCSITarget):
if self.configuration.sf_enable_vag:
iqn = connector['initiator']
provider_id = volume['provider_id']
vol_id = int(self._parse_provider_id_string(provider_id)[0])
vol_id = int(provider_id.split()[0])
# safe_create_vag may opt to reuse vs create a vag, so we need to
# add our vol_id.
@ -1746,7 +1962,7 @@ class SolidFireISCSI(iscsi_driver.SanISCSITarget):
iqn = properties['initiator']
vag = self._get_vags_by_name(iqn)
provider_id = volume['provider_id']
vol_id = int(self._parse_provider_id_string(provider_id)[0])
vol_id = int(provider_id.split()[0])
if vag and not volume['multiattach']:
# Multiattach causes problems with removing volumes from VAGs.

View File

@ -0,0 +1,3 @@
---
features:
- Added v2.1 replication support to SolidFire driver.