[Pure Storage] Add support for 3-site, trisync, replication.

Add new parameters `pure_trisync_enabled` and
`pure_trisync_pg_name`.

When these parameters are used in conjunction with a
volume type where `type <in> trisync` will create a volume
that is simultaneously replicate to two target arrays, one
synchronously and the other asynchronously.

It is required that two `replication_devices` are provided, one
that is sync and one that is async.

Also adds the ability to retype a volume between `sync` and
`trisync` replication types.

Consistency Groups are also supported for `trisync`` volume
types, as well as cloning `trisync` CGs.

These changes have been tested in-house by Pure and confirmed
to work as expected in the master branch for 2023.1.

Implements: blueprint pure-trisync
Change-Id: Idecb1c0421ece87f59818a65f15fcba1f49d940a
This commit is contained in:
Simon Dodsley 2022-10-28 14:26:46 -04:00
parent 27bb0b01d6
commit 81c919bb05
4 changed files with 338 additions and 26 deletions

View File

@ -1671,6 +1671,16 @@ class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase):
id=fake.GROUP_ID,
expected_name=("cinder-pod::consisgroup-%s-cinder" % fake.GROUP_ID)
),
dict(
repl_types=['trisync'],
id=fake.GROUP_ID,
expected_name=("cinder-pod::consisgroup-%s-cinder" % fake.GROUP_ID)
),
dict(
repl_types=[None, 'trisync'],
id=fake.GROUP_ID,
expected_name=("cinder-pod::consisgroup-%s-cinder" % fake.GROUP_ID)
),
dict(
repl_types=['sync', 'async'],
id=fake.GROUP_ID,
@ -1681,6 +1691,16 @@ class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase):
id=fake.GROUP_ID,
expected_name=("cinder-pod::consisgroup-%s-cinder" % fake.GROUP_ID)
),
dict(
repl_types=['trisync', 'sync', 'async'],
id=fake.GROUP_ID,
expected_name=("cinder-pod::consisgroup-%s-cinder" % fake.GROUP_ID)
),
dict(
repl_types=[None, 'trisync', 'sync', 'async'],
id=fake.GROUP_ID,
expected_name=("cinder-pod::consisgroup-%s-cinder" % fake.GROUP_ID)
),
)
@ddt.unpack
def test_get_pgroup_name(self, repl_types, id, expected_name):
@ -2616,6 +2636,21 @@ class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase):
expected_add_to_group=False,
expected_remove_from_pgroup=False,
),
# Turn on trisync rep
dict(
current_spec={
'replication_enabled': '<is> false',
},
new_spec={
'replication_type': '<in> trisync',
'replication_enabled': '<is> true',
},
expected_model_update=None,
# cannot retype via fast path to/from sync rep
expected_did_retype=False,
expected_add_to_group=False,
expected_remove_from_pgroup=False,
),
# Turn off sync rep
dict(
current_spec={
@ -2632,6 +2667,22 @@ class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase):
expected_add_to_group=False,
expected_remove_from_pgroup=False,
),
# Turn off trisync rep
dict(
current_spec={
'replication_type': '<in> trisync',
'replication_enabled': '<is> true',
},
new_spec={
'replication_type': '<in> trisync',
'replication_enabled': '<is> false',
},
expected_model_update=None,
# cannot retype via fast path to/from sync rep
expected_did_retype=False,
expected_add_to_group=False,
expected_remove_from_pgroup=False,
),
# Change from async to sync rep
dict(
current_spec={
@ -2648,6 +2699,22 @@ class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase):
expected_add_to_group=False,
expected_remove_from_pgroup=False,
),
# Change from async to trisync rep
dict(
current_spec={
'replication_type': '<in> async',
'replication_enabled': '<is> true',
},
new_spec={
'replication_type': '<in> trisync',
'replication_enabled': '<is> true',
},
expected_model_update=None,
# cannot retype via fast path to/from sync rep
expected_did_retype=False,
expected_add_to_group=False,
expected_remove_from_pgroup=False,
),
# Change from sync to async rep
dict(
current_spec={
@ -2664,6 +2731,52 @@ class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase):
expected_add_to_group=False,
expected_remove_from_pgroup=False,
),
# Change from trisync to async rep
dict(
current_spec={
'replication_type': '<in> trisync',
'replication_enabled': '<is> true',
},
new_spec={
'replication_type': '<in> async',
'replication_enabled': '<is> true',
},
expected_model_update=None,
# cannot retype via fast path to/from trisync rep
expected_did_retype=False,
expected_add_to_group=False,
expected_remove_from_pgroup=False,
),
# Change from trisync to sync rep
dict(
current_spec={
'replication_type': '<in> trisync',
'replication_enabled': '<is> true',
},
new_spec={
'replication_type': '<in> sync',
'replication_enabled': '<is> true',
},
expected_model_update=None,
expected_did_retype=True,
expected_add_to_group=False,
expected_remove_from_pgroup=True,
),
# Change from sync to trisync rep
dict(
current_spec={
'replication_type': '<in> sync',
'replication_enabled': '<is> true',
},
new_spec={
'replication_type': '<in> trisync',
'replication_enabled': '<is> true',
},
expected_model_update=None,
expected_did_retype=True,
expected_add_to_group=True,
expected_remove_from_pgroup=False,
),
)
@ddt.unpack
def test_retype_replication(self,
@ -2691,15 +2804,17 @@ class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase):
self.assertEqual(expected_did_retype, did_retype)
self.assertEqual(expected_model_update, model_update)
if expected_add_to_group:
self.array.set_pgroup.assert_called_once_with(
self.driver._replication_pg_name,
addvollist=[vol_name]
)
if "trisync" not in new_type.extra_specs["replication_type"]:
self.array.set_pgroup.assert_called_once_with(
self.driver._replication_pg_name,
addvollist=[vol_name]
)
if expected_remove_from_pgroup:
self.array.set_pgroup.assert_called_once_with(
self.driver._replication_pg_name,
remvollist=[vol_name]
)
if "trisync" not in current_spec["replication_type"]:
self.array.set_pgroup.assert_called_once_with(
self.driver._replication_pg_name,
remvollist=[vol_name]
)
@ddt.data(
dict(
@ -2716,6 +2831,13 @@ class PureBaseVolumeDriverTestCase(PureBaseSharedDriverTestCase):
},
expected_repl_type='sync'
),
dict(
specs={
'replication_type': '<in> trisync',
'replication_enabled': '<is> true',
},
expected_repl_type='trisync'
),
dict(
specs={
'replication_type': '<in> async',

View File

@ -83,6 +83,10 @@ PURE_OPTS = [
cfg.StrOpt("pure_replication_pg_name", default="cinder-group",
help="Pure Protection Group name to use for async replication "
"(will be created if it does not exist)."),
cfg.StrOpt("pure_trisync_pg_name", default="cinder-trisync",
help="Pure Protection Group name to use for trisync "
"replication leg inside the sync replication pod "
"(will be created if it does not exist)."),
cfg.StrOpt("pure_replication_pod_name", default="cinder-pod",
help="Pure Pod name to use for sync replication "
"(will be created if it does not exist)."),
@ -117,8 +121,13 @@ PURE_OPTS = [
"deletion in Cinder. Data will NOT be recoverable after "
"a delete with this set to True! When disabled, volumes "
"and snapshots will go into pending eradication state "
"and can be recovered."
)
"and can be recovered."),
cfg.BoolOpt("pure_trisync_enabled",
default=False,
help="When enabled and two replication devices are provided, "
"one each of types sync and async, this will enable "
"the ability to create a volume that is sync replicated "
"to one array and async replicated to a separate array.")
]
CONF = cfg.CONF
@ -129,7 +138,12 @@ GENERATED_NAME = re.compile(r".*-[a-f0-9]{32}-cinder$")
REPLICATION_TYPE_SYNC = "sync"
REPLICATION_TYPE_ASYNC = "async"
REPLICATION_TYPES = [REPLICATION_TYPE_SYNC, REPLICATION_TYPE_ASYNC]
REPLICATION_TYPE_TRISYNC = "trisync"
REPLICATION_TYPES = [
REPLICATION_TYPE_SYNC,
REPLICATION_TYPE_ASYNC,
REPLICATION_TYPE_TRISYNC
]
CHAP_SECRET_KEY = "PURE_TARGET_CHAP_SECRET"
@ -224,7 +238,9 @@ class PureBaseVolumeDriver(san.SanDriver):
self._replication_target_arrays = []
self._active_cluster_target_arrays = []
self._uniform_active_cluster_target_arrays = []
self._trisync_pg_name = None
self._replication_pg_name = None
self._trisync_name = None
self._replication_pod_name = None
self._replication_interval = None
self._replication_retention_short_term = None
@ -233,6 +249,7 @@ class PureBaseVolumeDriver(san.SanDriver):
self._async_replication_retention_policy = None
self._is_replication_enabled = False
self._is_active_cluster_enabled = False
self._is_trisync_enabled = False
self._active_backend_id = kwargs.get('active_backend_id', None)
self._failed_over_primary_array = None
self._user_agent = '%(base)s %(class)s/%(version)s (%(platform)s)' % {
@ -248,10 +265,13 @@ class PureBaseVolumeDriver(san.SanDriver):
'san_ip', 'driver_ssl_cert_verify', 'driver_ssl_cert_path',
'use_chap_auth', 'replication_device', 'reserved_percentage',
'max_over_subscription_ratio', 'pure_nvme_transport',
'pure_nvme_cidr_list', 'pure_nvme_cidr')
'pure_nvme_cidr_list', 'pure_nvme_cidr',
'pure_trisync_enabled', 'pure_trisync_pg_name')
return PURE_OPTS + additional_opts
def parse_replication_configs(self):
self._trisync_pg_name = (
self.configuration.pure_trisync_pg_name)
self._replication_pg_name = (
self.configuration.pure_replication_pg_name)
self._replication_pod_name = (
@ -394,6 +414,12 @@ class PureBaseVolumeDriver(san.SanDriver):
self.do_setup_replication()
if self.configuration.pure_trisync_enabled:
# If trisync is enabled check that we have only 1 sync and 1 async
# replication device set up and that the async target is not the
# same as any of the sync targets.
self.do_setup_trisync()
# If we have failed over at some point we need to adjust our current
# array based on the one that we have failed over to
if (self._active_backend_id is not None and
@ -403,6 +429,70 @@ class PureBaseVolumeDriver(san.SanDriver):
self._swap_replication_state(self._array, secondary_array)
break
def do_setup_trisync(self):
repl_device = {}
async_target = []
count = 0
replication_devices = self.configuration.safe_get(
'replication_device')
if not replication_devices or len(replication_devices) != 2:
LOG.error("Unable to configure TriSync Replication. Incorrect "
"number of replication devices enabled. "
"Only 2 are supported.")
else:
for replication_device in replication_devices:
san_ip = replication_device["san_ip"]
api_token = replication_device["api_token"]
repl_type = replication_device.get(
"type", REPLICATION_TYPE_ASYNC)
repl_device[count] = {
"rep_type": repl_type,
"token": api_token,
"san_ip": san_ip,
}
count += 1
if (repl_device[0]["rep_type"] == repl_device[1]["rep_type"]) or (
(repl_device[0]["token"] == repl_device[1]["token"])
):
LOG.error("Replication devices provided must be one each "
"of sync and async and targets must be different "
"to enable TriSync Replication.")
return
for replication_device in replication_devices:
repl_type = replication_device.get(
"type", REPLICATION_TYPE_ASYNC)
if repl_type == "async":
san_ip = replication_device["san_ip"]
api_token = replication_device["api_token"]
verify_https = strutils.bool_from_string(
replication_device.get("ssl_cert_verify", False))
ssl_cert_path = replication_device.get(
"ssl_cert_path", None)
target_array = self._get_flasharray(
san_ip,
api_token,
verify_https=verify_https,
ssl_cert_path=ssl_cert_path
)
trisync_async_info = target_array.get()
target_array.array_name = trisync_async_info[
"array_name"
]
async_target.append(target_array)
self._trisync_name = self._replication_pod_name + \
"::" + \
self._trisync_pg_name
self._is_trisync_enabled = True
self._setup_replicated_pgroups(
self._get_current_array(),
async_target,
self._trisync_name,
self._replication_interval,
self._async_replication_retention_policy
)
def do_setup_replication(self):
replication_devices = self.configuration.safe_get(
'replication_device')
@ -544,10 +634,13 @@ class PureBaseVolumeDriver(san.SanDriver):
# it wont be set in the cinder DB until we return from create_volume
volume.provider_id = purity_vol_name
async_enabled = False
trisync_enabled = False
try:
self._add_to_group_if_needed(volume, purity_vol_name)
async_enabled = self._enable_async_replication_if_needed(
array, volume)
trisync_enabled = self._enable_trisync_replication_if_needed(
array, volume)
except purestorage.PureError as err:
with excutils.save_and_reraise_exception():
LOG.error("Failed to add volume %s to pgroup, removing volume",
@ -556,7 +649,8 @@ class PureBaseVolumeDriver(san.SanDriver):
array.eradicate_volume(purity_vol_name)
repl_status = fields.ReplicationStatus.DISABLED
if self._is_vol_in_pod(purity_vol_name) or async_enabled:
if (self._is_vol_in_pod(purity_vol_name) or
(async_enabled or trisync_enabled)):
repl_status = fields.ReplicationStatus.ENABLED
if not volume.metadata:
@ -586,6 +680,44 @@ class PureBaseVolumeDriver(san.SanDriver):
return True
return False
def _enable_trisync_replication_if_needed(self, array, volume):
repl_type = self._get_replication_type_from_vol_type(
volume.volume_type)
if (self.configuration.pure_trisync_enabled and
repl_type == REPLICATION_TYPE_TRISYNC):
self._enable_trisync_replication(array, volume)
return True
return False
def _enable_trisync_replication(self, array, volume):
"""Add volume to sync-replicated protection group"""
try:
array.set_pgroup(self._trisync_name,
addvollist=[self._get_vol_name(volume)])
except purestorage.PureHTTPError as err:
with excutils.save_and_reraise_exception() as ctxt:
if (err.code == 400 and
ERR_MSG_ALREADY_BELONGS in err.text):
# Happens if the volume already added to PG.
ctxt.reraise = False
LOG.warning("Adding Volume to sync-replicated "
"Protection Group failed with message: %s",
err.text)
def _disable_trisync_replication(self, array, volume):
"""Remove volume from sync-replicated protection group"""
try:
array.set_pgroup(self._trisync_name,
remvollist=[self._get_vol_name(volume)])
except purestorage.PureHTTPError as err:
with excutils.save_and_reraise_exception() as ctxt:
if (err.code == 400 and
ERR_MSG_NOT_EXIST in err.text):
ctxt.reraise = False
LOG.warning("Removing Volume from sync-replicated "
"Protection Group failed with message: %s",
err.text)
def _enable_async_replication(self, array, volume):
"""Add volume to replicated protection group."""
try:
@ -924,6 +1056,8 @@ class PureBaseVolumeDriver(san.SanDriver):
repl_types = [REPLICATION_TYPE_ASYNC]
if self._is_active_cluster_enabled:
repl_types.append(REPLICATION_TYPE_SYNC)
if self._is_trisync_enabled:
repl_types.append(REPLICATION_TYPE_TRISYNC)
data["replication_type"] = repl_types
data["replication_count"] = len(self._replication_target_arrays)
data["replication_targets"] = [array.backend_id for array
@ -1028,6 +1162,14 @@ class PureBaseVolumeDriver(san.SanDriver):
group,
cloned_vol_name
)
repl_type = self._get_replication_type_from_vol_type(
source_vol.volume_type)
if (self.configuration.pure_trisync_enabled and
repl_type == REPLICATION_TYPE_TRISYNC):
self._enable_trisync_replication(current_array, cloned_vol)
LOG.info('Trisync replication set for new cloned '
'volume %s', cloned_vol_name)
finally:
self._delete_pgsnapshot(tmp_pgsnap_name)
return vol_models
@ -1652,6 +1794,8 @@ class PureBaseVolumeDriver(san.SanDriver):
return REPLICATION_TYPE_ASYNC
elif replication_type_spec == "<in> sync":
return REPLICATION_TYPE_SYNC
elif replication_type_spec == "<in> trisync":
return REPLICATION_TYPE_TRISYNC
else:
# if no type was specified but replication is enabled assume
# that async replication is enabled
@ -1719,7 +1863,7 @@ class PureBaseVolumeDriver(san.SanDriver):
repl_type = self._get_replication_type_from_vol_type(
volume.volume_type)
if repl_type == REPLICATION_TYPE_SYNC:
if repl_type in [REPLICATION_TYPE_SYNC, REPLICATION_TYPE_TRISYNC]:
base_name = self._replication_pod_name + "::" + base_name
return base_name + "-cinder"
@ -1747,7 +1891,10 @@ class PureBaseVolumeDriver(san.SanDriver):
# if so, we need to use a group name accounting for the ActiveCluster
# pod.
base_name = ""
if REPLICATION_TYPE_SYNC in self._group_potential_repl_types(pgroup):
if ((REPLICATION_TYPE_SYNC in
self._group_potential_repl_types(pgroup)) or
(REPLICATION_TYPE_TRISYNC in
self._group_potential_repl_types(pgroup))):
base_name = self._replication_pod_name + "::"
return "%(base)sconsisgroup-%(id)s-cinder" % {
@ -1780,6 +1927,8 @@ class PureBaseVolumeDriver(san.SanDriver):
@staticmethod
def _get_pgroup_vol_snap_name(pg_name, pgsnap_suffix, volume_name):
if "::" in volume_name:
volume_name = volume_name.split("::")[1]
return "%(pgroup_name)s.%(pgsnap_suffix)s.%(volume_name)s" % {
'pgroup_name': pg_name,
'pgsnap_suffix': pgsnap_suffix,
@ -1794,10 +1943,12 @@ class PureBaseVolumeDriver(san.SanDriver):
group_snap = snapshot.group_snapshot
elif snapshot.cgsnapshot:
group_snap = snapshot.cgsnapshot
volume_name = self._get_vol_name(snapshot.volume)
if "::" in volume_name:
volume_name = volume_name.split("::")[1]
pg_vol_snap_name = "%(group_snap)s.%(volume_name)s" % {
'group_snap': self._get_pgroup_snap_name(group_snap),
'volume_name': self._get_vol_name(snapshot.volume)
'volume_name': volume_name
}
return pg_vol_snap_name
@ -1887,7 +2038,8 @@ class PureBaseVolumeDriver(san.SanDriver):
model_update = {
"replication_status": fields.ReplicationStatus.DISABLED
}
elif prev_repl_type == REPLICATION_TYPE_SYNC:
elif prev_repl_type in [REPLICATION_TYPE_SYNC,
REPLICATION_TYPE_TRISYNC]:
# We can't pull a volume out of a stretched pod, indicate
# to the volume manager that we need to use a migration instead
return False, None
@ -1899,16 +2051,39 @@ class PureBaseVolumeDriver(san.SanDriver):
model_update = {
"replication_status": fields.ReplicationStatus.ENABLED
}
elif new_repl_type == REPLICATION_TYPE_SYNC:
elif new_repl_type in [REPLICATION_TYPE_SYNC,
REPLICATION_TYPE_TRISYNC]:
# We can't add a volume to a stretched pod, they must be
# created in one, indicate to the volume manager that it
# should do a migration.
return False, None
elif (previous_vol_replicated and new_vol_replicated
and (prev_repl_type != new_repl_type)):
# We can't move a volume in or out of a pod, indicate to the
# manager that it should do a migration for this retype
return False, None
elif previous_vol_replicated and new_vol_replicated:
if prev_repl_type == REPLICATION_TYPE_ASYNC:
if new_repl_type in [REPLICATION_TYPE_SYNC,
REPLICATION_TYPE_TRISYNC]:
# We can't add a volume to a stretched pod, they must be
# created in one, indicate to the volume manager that it
# should do a migration.
return False, None
if prev_repl_type == REPLICATION_TYPE_SYNC:
if new_repl_type == REPLICATION_TYPE_ASYNC:
# We can't move a volume in or out of a pod, indicate to
# the manager that it should do a migration for this retype
return False, None
elif new_repl_type == REPLICATION_TYPE_TRISYNC:
# Add to trisync protection group
self._enable_trisync_replication(self._get_current_array(),
volume)
if prev_repl_type == REPLICATION_TYPE_TRISYNC:
if new_repl_type == REPLICATION_TYPE_ASYNC:
# We can't move a volume in or out of a pod, indicate to
# the manager that it should do a migration for this retype
return False, None
elif new_repl_type == REPLICATION_TYPE_SYNC:
# Remove from trisync protection group
self._disable_trisync_replication(
self._get_current_array(), volume
)
# If we are moving to a volume type with QoS settings then
# make sure the volume gets the correct new QoS settings.
@ -2242,6 +2417,9 @@ class PureBaseVolumeDriver(san.SanDriver):
pgroup_name_on_target = self._get_pgroup_name_on_target(
primary.array_name, pg_name)
if self._is_trisync_enabled:
pgroup_name_on_target = pg_name.replace("::", ":")
for target_array in secondaries:
self._wait_until_target_group_setting_propagates(
target_array,

View File

@ -273,11 +273,17 @@ connections should be made to both upon attaching.
Note that more than one ``replication_device`` line can be added to allow for
multi-target device replication.
To enable 3-site replication, ie. a volume that is synchronously replicated to
one array and also asynchronously replicated to another then you must supply
two, and only two, ``replication_device`` lines, where one has ``type`` of
``sync`` and one where ``type`` is ``async``. Additionally, the parameter
``pure_trisync_enabled`` must be set ``True``.
A volume is only replicated if the volume is of a volume-type that has
the extra spec ``replication_enabled`` set to ``<is> True``. You can optionally
specify the ``replication_type`` key to specify ``<in> sync`` or ``<in> async``
to choose the type of replication for that volume. If not specified it will
default to ``async``.
or ``<in> trisync`` to choose the type of replication for that volume. If not
specified it will default to ``async``.
To create a volume type that specifies replication to remote back ends with
async replication:

View File

@ -0,0 +1,6 @@
---
features:
- |
Pure Storage driver: Added support for 3-site replication, aka trisync. Requires two
replication devices to be created, one async and one sync, plus the addition of new
parameters ``pure_trisync_enabled`` and ``pure_trisync_pg_name``.