From 4e320543cf6a115fdaa807ee9df3a8adbbd9e920 Mon Sep 17 00:00:00 2001 From: Ivan Pchelintsev Date: Wed, 12 Feb 2020 16:43:08 +0300 Subject: [PATCH] Add OpenStack volume replication v2.1 support in VxFlex OS driver Volume replication support will be added in next VxFlex OS (v3.5.0) release. Cinder driver for VxFlex OS supports volumes/snapshots with replication enabled according to OpenStack volume replication specification. Implements: blueprint vxflexos-replication-support Change-Id: I3f2ec1ddf1867261253190953f7a65fff22c7404 --- .../drivers/dell_emc/vxflexos/__init__.py | 3 + .../dell_emc/vxflexos/test_replication.py | 112 +++++ .../drivers/dell_emc/vxflexos/driver.py | 422 ++++++++++++++++-- .../drivers/dell_emc/vxflexos/rest_client.py | 182 +++++++- .../drivers/dell-emc-vxflex-driver.rst | 119 ++++- doc/source/reference/support-matrix.ini | 2 +- ...-replication-support-f43e62df35e16e3a.yaml | 5 + 7 files changed, 795 insertions(+), 50 deletions(-) create mode 100644 cinder/tests/unit/volume/drivers/dell_emc/vxflexos/test_replication.py create mode 100644 releasenotes/notes/vxflexos-replication-support-f43e62df35e16e3a.yaml diff --git a/cinder/tests/unit/volume/drivers/dell_emc/vxflexos/__init__.py b/cinder/tests/unit/volume/drivers/dell_emc/vxflexos/__init__.py index 83e8ac54bca..f19096acce1 100644 --- a/cinder/tests/unit/volume/drivers/dell_emc/vxflexos/__init__.py +++ b/cinder/tests/unit/volume/drivers/dell_emc/vxflexos/__init__.py @@ -127,12 +127,15 @@ class TestVxFlexOSDriver(test.TestCase): self._set_overrides() self.driver = mocks.VxFlexOSDriver(configuration=self.configuration) self.driver.primary_client = mocks.VxFlexOSClient(self.configuration) + self.driver.secondary_client = mocks.VxFlexOSClient(self.configuration, + is_primary=False) self.driver.do_setup({}) self.mock_object(requests, 'get', self.do_request) self.mock_object(requests, 'post', self.do_request) self.driver.primary_client.do_setup() + self.driver.secondary_client.do_setup() def _set_overrides(self): # Override the defaults to fake values diff --git a/cinder/tests/unit/volume/drivers/dell_emc/vxflexos/test_replication.py b/cinder/tests/unit/volume/drivers/dell_emc/vxflexos/test_replication.py new file mode 100644 index 00000000000..75af3c2b4fc --- /dev/null +++ b/cinder/tests/unit/volume/drivers/dell_emc/vxflexos/test_replication.py @@ -0,0 +1,112 @@ +# Copyright (c) 2020 Dell Inc. or its subsidiaries. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import ddt + +from cinder import exception +from cinder.tests.unit.volume.drivers.dell_emc import vxflexos +from cinder.volume import configuration + + +@ddt.ddt +class TestReplication(vxflexos.TestVxFlexOSDriver): + """Test cases for VxFlex OS replication support.""" + + def setUp(self): + super(TestReplication, self).setUp() + + self.replication_backend_id = 'vxflex_repl' + replication_device = [ + { + 'backend_id': self.replication_backend_id, + 'san_ip': '127.0.0.2', + 'san_login': 'test', + 'san_password': 'pass' + } + ] + self.override_config('replication_device', + override=replication_device, + group=configuration.SHARED_CONF_GROUP) + + self.HTTPS_MOCK_RESPONSES = { + self.RESPONSE_MODE.Valid: { + 'types/Domain/instances/getByName::' + self.PROT_DOMAIN_NAME: + '"{}"'.format(self.PROT_DOMAIN_ID), + 'types/Pool/instances/getByName::{},{}'.format( + self.PROT_DOMAIN_ID, self.STORAGE_POOL_NAME): + '"{}"'.format(self.STORAGE_POOL_ID), + 'instances/ProtectionDomain::{}'.format(self.PROT_DOMAIN_ID): + {'id': self.PROT_DOMAIN_ID}, + 'instances/StoragePool::{}'.format(self.STORAGE_POOL_ID): + {'id': self.STORAGE_POOL_ID, 'zeroPaddingEnabled': True}, + }, + } + + def test_do_setup_replication_configured(self): + super(vxflexos.mocks.VxFlexOSDriver, self.driver).do_setup({}) + self.driver.check_for_setup_error() + self.assertTrue(self.driver.secondary_client.is_configured) + self.assertTrue(self.driver.replication_enabled) + + @ddt.data( + [ + { + 'backend_id': 'vxflex_repl1' + }, + { + 'backend_id': 'vxflex_repl2' + } + ], + [ + { + 'backend_id': 'vxflex_repl1', + 'san_ip': '127.0.0.2' + }, + ] + ) + def test_do_setup_replication_bad_configuration(self, replication_device): + self.override_config('replication_device', + override=replication_device, + group=configuration.SHARED_CONF_GROUP) + self.assertRaises(exception.InvalidInput, + super(vxflexos.mocks.VxFlexOSDriver, + self.driver).do_setup, + {}) + + def test_do_setup_already_failed_over(self): + self.driver.active_backend_id = 'vxflex_repl' + super(vxflexos.mocks.VxFlexOSDriver, self.driver).do_setup({}) + self.driver.check_for_setup_error() + self.assertFalse(self.driver.replication_enabled) + + def test_failover_host(self): + self.test_do_setup_replication_configured() + self.driver.failover_host({}, [], self.replication_backend_id) + self.assertEqual(self.replication_backend_id, + self.driver.active_backend_id) + + def test_failover_host_failback(self): + self.test_do_setup_already_failed_over() + self.driver.failover_host({}, [], 'default') + self.assertEqual('default', self.driver.active_backend_id) + + @ddt.data("not_valid_target", None) + def test_failover_host_secondary_id_invalid(self, secondary_id): + self.test_do_setup_replication_configured() + self.assertRaises(exception.InvalidReplicationTarget, + self.driver.failover_host, + context={}, + volumes=[], + secondary_id=secondary_id) diff --git a/cinder/volume/drivers/dell_emc/vxflexos/driver.py b/cinder/volume/drivers/dell_emc/vxflexos/driver.py index 87c83ecfccb..5dbf574d8e8 100644 --- a/cinder/volume/drivers/dell_emc/vxflexos/driver.py +++ b/cinder/volume/drivers/dell_emc/vxflexos/driver.py @@ -17,11 +17,13 @@ Driver for Dell EMC VxFlex OS (formerly named Dell EMC ScaleIO). """ import math +from operator import xor from os_brick import initiator from oslo_config import cfg from oslo_log import log as logging from oslo_log import versionutils +from oslo_utils import excutils from oslo_utils import units import six from six.moves import http_client @@ -40,6 +42,7 @@ from cinder.volume.drivers.dell_emc.vxflexos import options from cinder.volume.drivers.dell_emc.vxflexos import rest_client from cinder.volume.drivers.dell_emc.vxflexos import utils as flex_utils from cinder.volume.drivers.san import san +from cinder.volume import manager from cinder.volume import qos_specs from cinder.volume import volume_types from cinder.volume import volume_utils @@ -54,6 +57,7 @@ LOG = logging.getLogger(__name__) PROVISIONING_KEY = "provisioning:type" +REPLICATION_CG_KEY = "vxflexos:replication_cg" QOS_IOPS_LIMIT_KEY = "maxIOPS" QOS_BANDWIDTH_LIMIT = "maxBWS" QOS_IOPS_PER_GB = "maxIOPSperGB" @@ -82,9 +86,10 @@ class VxFlexOSDriver(driver.VolumeDriver): 2.0.5 - Change driver name, rename config file options 3.0.0 - Add support for VxFlex OS 3.0.x and for volumes compression 3.5.0 - Add support for VxFlex OS 3.5.x + 3.5.1 - Add volume replication v2.1 support for VxFlex OS 3.5.x """ - VERSION = "3.5.0" + VERSION = "3.5.1" # ThirdPartySystems wiki CI_WIKI_NAME = "DellEMC_VxFlexOS_CI" @@ -96,27 +101,68 @@ class VxFlexOSDriver(driver.VolumeDriver): def __init__(self, *args, **kwargs): super(VxFlexOSDriver, self).__init__(*args, **kwargs) + self.active_backend_id = kwargs.get("active_backend_id") self.configuration.append_config_values(san.san_opts) self.configuration.append_config_values(vxflexos_opts) self.statisticProperties = None self.storage_pools = None self.provisioning_type = None self.connector = None + self.replication_enabled = None + self.replication_device = None + self.failover_choices = None self.primary_client = None + self.secondary_client = None + + def _init_vendor_properties(self): + properties = {} + self._set_property( + properties, + "vxflexos:replication_cg", + "VxFlex OS Replication Consistency Group.", + _("Specifies the VxFlex OS Replication Consistency group for a " + "volume type. Source and target volumes will be added to the " + "specified RCG during creation."), + "string") + return properties, "vxflexos" @staticmethod def get_driver_options(): return vxflexos_opts - def _get_client(self): + @property + def _available_failover_choices(self): + """Available choices to failover/failback host.""" + + return self.failover_choices.difference({self.active_backend_id}) + + @property + def _is_failed_over(self): + """Check if storage backend is in FAILED_OVER state. + + :return: storage backend failover state + """ + + return bool(self.active_backend_id and + self.active_backend_id != "default") + + def _get_client(self, secondary=False): """Get appropriate REST client for storage backend. + :param secondary: primary or secondary client :return: REST client for storage backend """ - return self.primary_client + if xor(self._is_failed_over, secondary): + return self.secondary_client + else: + return self.primary_client def do_setup(self, context): + if not self.active_backend_id: + self.active_backend_id = manager.VolumeManager.FAILBACK_SENTINEL + if not self.failover_choices: + self.failover_choices = {manager.VolumeManager.FAILBACK_SENTINEL} vxflexos_storage_pools = ( self.configuration.safe_get("vxflexos_storage_pools") ) @@ -138,7 +184,10 @@ class VxFlexOSDriver(driver.VolumeDriver): self.configuration.num_volume_device_scan_tries ) self.primary_client = rest_client.RestClient(self.configuration) + self.secondary_client = rest_client.RestClient(self.configuration, + is_primary=False) self.primary_client.do_setup() + self.secondary_client.do_setup() def check_for_setup_error(self): client = self._get_client() @@ -184,6 +233,40 @@ class VxFlexOSDriver(driver.VolumeDriver): "Consult the VxFlex OS product documentation " "for information on how to enable zero padding " "and prevent this from occurring.", pool) + # validate replication configuration + if self.secondary_client.is_configured: + self.replication_device = self.configuration.replication_device[0] + self.failover_choices.add(self.replication_device["backend_id"]) + if self._is_failed_over: + LOG.warning("Storage backend is in FAILED_OVER state. " + "Replication is DISABLED.") + self.replication_enabled = False + else: + primary_version = self.primary_client.query_rest_api_version() + secondary_version = ( + self.secondary_client.query_rest_api_version() + ) + if not (flex_utils.version_gte(primary_version, "3.5") and + flex_utils.version_gte(secondary_version, "3.5")): + LOG.info("VxFlex OS versions less than v3.5 do not " + "support replication.") + self.replication_enabled = False + else: + self.replication_enabled = True + else: + self.replication_enabled = False + + @property + def replication_targets(self): + """Replication targets for storage backend. + + :return: replication targets + """ + + if self.replication_enabled and not self._is_failed_over: + return [self.replication_device] + else: + return [] def _get_queryable_statistics(self, sio_type, sio_id): """Get statistic properties that can be obtained from VxFlex OS. @@ -251,15 +334,224 @@ class VxFlexOSDriver(driver.VolumeDriver): ) return self.statisticProperties + def _setup_volume_replication(self, vol_or_snap, source_provider_id): + """Configure replication for volume or snapshot. + + Create volume on secondary VxFlex OS storage backend. + Pair volumes and add replication pair to replication consistency group. + + :param vol_or_snap: source volume/snapshot + :param source_provider_id: primary VxFlex OS volume id + """ + try: + # If vol_or_snap has 'volume' attribute we are dealing + # with snapshot. Necessary parameters is stored in volume object. + entity = vol_or_snap.volume + entity_type = "snapshot" + except AttributeError: + entity = vol_or_snap + entity_type = "volume" + LOG.info("Configure replication for %(entity_type)s %(id)s. ", + {"entity_type": entity_type, "id": vol_or_snap.id}) + try: + pd_sp = volume_utils.extract_host(entity.host, "pool") + protection_domain_name = pd_sp.split(":")[0] + storage_pool_name = pd_sp.split(":")[1] + self._check_volume_creation_safe(protection_domain_name, + storage_pool_name, + secondary=True) + storage_type = self._get_volumetype_extraspecs(entity) + rcg_name = storage_type.get(REPLICATION_CG_KEY) + LOG.info("Replication Consistency Group name: %s.", rcg_name) + provisioning, compression = self._get_provisioning_and_compression( + storage_type, + protection_domain_name, + storage_pool_name, + secondary=True + ) + dest_provider_id = self._get_client(secondary=True).create_volume( + protection_domain_name, + storage_pool_name, + vol_or_snap.id, + entity.size, + provisioning, + compression) + self._get_client().create_volumes_pair(rcg_name, + source_provider_id, + dest_provider_id) + LOG.info("Successfully configured replication for %(entity_type)s " + "%(id)s.", + {"entity_type": entity_type, "id": vol_or_snap.id}) + except exception.VolumeBackendAPIException: + with excutils.save_and_reraise_exception(): + LOG.error("Failed to configure replication for " + "%(entity_type)s %(id)s.", + {"entity_type": entity_type, "id": vol_or_snap.id}) + + def _teardown_volume_replication(self, provider_id): + """Stop volume/snapshot replication. + + Unpair volumes/snapshot and remove volume/snapshot from VxFlex OS + secondary storage backend. + """ + + if not provider_id: + LOG.warning("Volume or snapshot does not have provider_id thus " + "does not map to VxFlex OS volume.") + return + try: + pair_id, remote_pair_id, vol_id, remote_vol_id = ( + self._get_client().get_volumes_pair_attrs("localVolumeId", + provider_id) + ) + except exception.VolumeBackendAPIException: + LOG.info("Replication pair for volume %s is not found. " + "Replication for volume was not configured or was " + "modified from storage side.", provider_id) + return + self._get_client().remove_volumes_pair(pair_id) + if not self._is_failed_over: + self._get_client(secondary=True).remove_volume(remote_vol_id) + + def failover_host(self, context, volumes, secondary_id=None, groups=None): + if secondary_id not in self._available_failover_choices: + msg = (_("Target %(target)s is not valid choice. " + "Valid choices: %(choices)s.") % + {"target": secondary_id, + "choices": ', '.join(self._available_failover_choices)}) + LOG.error(msg) + raise exception.InvalidReplicationTarget(reason=msg) + is_failback = secondary_id == manager.VolumeManager.FAILBACK_SENTINEL + failed_over_rcgs = {} + model_updates = [] + for volume in volumes: + storage_type = self._get_volumetype_extraspecs(volume) + rcg_name = storage_type.get(REPLICATION_CG_KEY) + if not rcg_name: + LOG.error("Replication Consistency Group is not specified in " + "volume %s VolumeType.", volume.id) + failover_status = fields.ReplicationStatus.FAILOVER_ERROR + updates = self._generate_model_updates(volume, + failover_status, + is_failback) + model_updates.append(updates) + continue + if rcg_name in failed_over_rcgs: + failover_status = failed_over_rcgs[rcg_name] + else: + failover_status = self._failover_replication_cg( + rcg_name, is_failback + ) + failed_over_rcgs[rcg_name] = failover_status + updates = self._generate_model_updates(volume, + failover_status, + is_failback) + model_updates.append({"volume_id": volume.id, "updates": updates}) + self.active_backend_id = secondary_id + self.replication_enabled = is_failback + return secondary_id, model_updates, [] + + def _failover_replication_cg(self, rcg_name, is_failback): + """Failover/failback Replication Consistency Group on storage backend. + + :param rcg_name: name of VxFlex OS Replication Consistency Group + :param is_failback: is failover or failback + :return: failover status of Replication Consistency Group + """ + + action = "failback" if is_failback else "failover" + LOG.info("Perform %(action)s of Replication Consistency Group " + "%(rcg_name)s.", {"action": action, "rcg_name": rcg_name}) + try: + self._get_client(secondary=True).failover_failback_replication_cg( + rcg_name, is_failback + ) + failover_status = fields.ReplicationStatus.FAILED_OVER + LOG.info("Successfully performed %(action)s of Replication " + "Consistency Group %(rcg_name)s.", + {"action": action, "rcg_name": rcg_name}) + except exception.VolumeBackendAPIException: + LOG.error("Failed to perform %(action)s of Replication " + "Consistency Group %(rcg_name)s.", + {"action": action, "rcg_name": rcg_name}) + failover_status = fields.ReplicationStatus.FAILOVER_ERROR + return failover_status + + def _generate_model_updates(self, volume, failover_status, is_failback): + """Generate volume model updates after failover/failback. + + Get new provider_id for volume and update volume snapshots if + presented. + """ + + LOG.info("Generate model updates for volume %s and its snapshots.", + volume.id) + error_status = (fields.ReplicationStatus.ERROR if is_failback else + fields.ReplicationStatus.FAILOVER_ERROR) + updates = {} + if failover_status == fields.ReplicationStatus.FAILED_OVER: + client = self._get_client(secondary=True) + try: + LOG.info("Query new provider_id for volume %s.", volume.id) + pair_id, remote_pair_id, vol_id, remote_vol_id = ( + client.get_volumes_pair_attrs("remoteVolumeId", + volume.provider_id) + ) + LOG.info("New provider_id for volume %(vol_id)s: " + "%(provider_id)s.", + {"vol_id": volume.id, "provider_id": vol_id}) + updates["provider_id"] = vol_id + except exception.VolumeBackendAPIException: + LOG.error("Failed to query new provider_id for volume " + "%(vol_id)s. Volume status will be changed to " + "%(status)s.", + {"vol_id": volume.id, "status": error_status}) + updates["replication_status"] = error_status + for snapshot in volume.snapshots: + try: + LOG.info("Query new provider_id for snapshot %(snap_id)s " + "of volume %(vol_id)s.", + {"snap_id": snapshot.id, "vol_id": volume.id}) + pair_id, remote_pair_id, snap_id, remote_snap_id = ( + client.get_volumes_pair_attrs( + "remoteVolumeId", snapshot.provider_id) + ) + LOG.info("New provider_id for snapshot %(snap_id)s " + "of volume %(vol_id)s: %(provider_id)s.", + { + "snap_id": snapshot.id, + "vol_id": volume.id, + "provider_id": snap_id, + }) + snapshot.update({"provider_id": snap_id}) + except exception.VolumeBackendAPIException: + LOG.error("Failed to query new provider_id for snapshot " + "%(snap_id)s of volume %(vol_id)s. " + "Snapshot status will be changed to " + "%(status)s.", + { + "vol_id": volume.id, + "snap_id": snapshot.id, + "status": fields.SnapshotStatus.ERROR, + }) + snapshot.update({"status": fields.SnapshotStatus.ERROR}) + finally: + snapshot.save() + else: + updates["replication_status"] = error_status + return updates + def _get_provisioning_and_compression(self, storage_type, protection_domain_name, - storage_pool_name): + storage_pool_name, + secondary=False): """Get volume provisioning and compression from VolumeType extraspecs. :param storage_type: extraspecs :param protection_domain_name: name of VxFlex OS Protection Domain :param storage_pool_name: name of VxFlex OS Storage Pool + :param secondary: primary or secondary client :return: volume provisioning and compression """ @@ -275,11 +567,13 @@ class VxFlexOSDriver(driver.VolumeDriver): provisioning = "ThinProvisioned" if (provisioning_type == "thick" and self._check_pool_support_thick_vols(protection_domain_name, - storage_pool_name)): + storage_pool_name, + secondary)): provisioning = "ThickProvisioned" compression = "None" if self._check_pool_support_compression(protection_domain_name, - storage_pool_name): + storage_pool_name, + secondary): if provisioning_type == "compressed": compression = "Normal" return provisioning, compression @@ -297,20 +591,8 @@ class VxFlexOSDriver(driver.VolumeDriver): pd_sp = volume_utils.extract_host(volume.host, "pool") protection_domain_name = pd_sp.split(":")[0] storage_pool_name = pd_sp.split(":")[1] - allowed = client.is_volume_creation_safe(protection_domain_name, - storage_pool_name) - if not allowed: - # Do not allow volume creation on this backend. - # Volumes may leak data between tenants. - LOG.error("Volume creation rejected due to " - "zero padding being disabled for pool, %s:%s. " - "This behaviour can be changed by setting " - "the configuration option " - "vxflexos_allow_non_padded_volumes = True.", - protection_domain_name, storage_pool_name) - msg = _("Volume creation rejected due to " - "unsafe backend configuration.") - raise exception.VolumeBackendAPIException(data=msg) + self._check_volume_creation_safe(protection_domain_name, + storage_pool_name) storage_type = self._get_volumetype_extraspecs(volume) LOG.info("Create volume %(vol_id)s. Volume type: %(volume_type)s, " "Storage Pool name: %(pool_name)s, Protection Domain name: " @@ -326,14 +608,17 @@ class VxFlexOSDriver(driver.VolumeDriver): protection_domain_name, storage_pool_name ) - source_provider_id = client.create_volume(protection_domain_name, - storage_pool_name, - volume, provisioning, - compression) + provider_id = client.create_volume(protection_domain_name, + storage_pool_name, + volume.id, + volume.size, + provisioning, + compression) real_size = int(flex_utils.round_to_num_gran(volume.size)) model_updates = { - "provider_id": source_provider_id, + "provider_id": provider_id, "size": real_size, + "replication_status": fields.ReplicationStatus.DISABLED, } LOG.info("Successfully created volume %(vol_id)s. " "Volume size: %(size)s. VxFlex OS volume name: %(vol_name)s, " @@ -342,8 +627,13 @@ class VxFlexOSDriver(driver.VolumeDriver): "vol_id": volume.id, "size": real_size, "vol_name": flex_utils.id_to_base64(volume.id), - "provider_id": source_provider_id, + "provider_id": provider_id, }) + if volume.is_replicated(): + self._setup_volume_replication(volume, provider_id) + model_updates["replication_status"] = ( + fields.ReplicationStatus.ENABLED + ) return model_updates def _check_volume_size(self, size): @@ -362,6 +652,27 @@ class VxFlexOSDriver(driver.VolumeDriver): LOG.error(msg) raise exception.VolumeBackendAPIException(data=msg) + def _check_volume_creation_safe(self, + protection_domain_name, + storage_pool_name, + secondary=False): + allowed = self._get_client(secondary).is_volume_creation_safe( + protection_domain_name, + storage_pool_name + ) + if not allowed: + # Do not allow volume creation on this backend. + # Volumes may leak data between tenants. + LOG.error("Volume creation rejected due to " + "zero padding being disabled for pool, %s:%s. " + "This behaviour can be changed by setting " + "the configuration option " + "vxflexos_allow_non_padded_volumes = True.", + protection_domain_name, storage_pool_name) + msg = _("Volume creation rejected due to " + "unsafe backend configuration.") + raise exception.VolumeBackendAPIException(data=msg) + def create_snapshot(self, snapshot): """Create volume snapshot on VxFlex OS storage backend. @@ -388,6 +699,8 @@ class VxFlexOSDriver(driver.VolumeDriver): "snap_name": flex_utils.id_to_base64(provider_id), "snap_provider_id": provider_id, }) + if snapshot.volume.is_replicated(): + self._setup_volume_replication(snapshot, provider_id) return model_updates def _create_volume_from_source(self, volume, source): @@ -409,6 +722,7 @@ class VxFlexOSDriver(driver.VolumeDriver): provider_id = client.snapshot_volume(source.provider_id, volume.id) model_updates = { "provider_id": provider_id, + "replication_status": fields.ReplicationStatus.DISABLED, } LOG.info("Successfully created volume %(vol_id)s " "from source %(source_id)s. VxFlex OS volume name: " @@ -430,6 +744,11 @@ class VxFlexOSDriver(driver.VolumeDriver): if volume.size > source_size: real_size = flex_utils.round_to_num_gran(volume.size) client.extend_volume(provider_id, real_size) + if volume.is_replicated(): + self._setup_volume_replication(volume, provider_id) + model_updates["replication_status"] = ( + fields.ReplicationStatus.ENABLED + ) return model_updates def create_volume_from_snapshot(self, volume, snapshot): @@ -460,6 +779,13 @@ class VxFlexOSDriver(driver.VolumeDriver): volume_real_old_size = flex_utils.round_to_num_gran(volume.size) if volume_real_old_size == volume_new_size: return + if volume.is_replicated(): + pair_id, remote_pair_id, vol_id, remote_vol_id = ( + self._get_client().get_volumes_pair_attrs("localVolumeId", + volume.provider_id) + ) + self._get_client(secondary=True).extend_volume(remote_vol_id, + volume_new_size) self._get_client().extend_volume(volume.provider_id, volume_new_size) def create_cloned_volume(self, volume, src_vref): @@ -477,10 +803,14 @@ class VxFlexOSDriver(driver.VolumeDriver): def delete_volume(self, volume): """Delete volume from VxFlex OS storage backend. + If volume is replicated, replication will be stopped first. + :param volume: volume to be deleted """ LOG.info("Delete volume %s.", volume.id) + if volume.is_replicated(): + self._teardown_volume_replication(volume.provider_id) self._get_client().remove_volume(volume.provider_id) def delete_snapshot(self, snapshot): @@ -490,6 +820,8 @@ class VxFlexOSDriver(driver.VolumeDriver): """ LOG.info("Delete snapshot %s.", snapshot.id) + if snapshot.volume.is_replicated(): + self._teardown_volume_replication(snapshot.provider_id) self._get_client().remove_volume(snapshot.provider_id) def initialize_connection(self, volume, connector, **kwargs): @@ -615,6 +947,10 @@ class VxFlexOSDriver(driver.VolumeDriver): stats["thick_provisioning_support"] = True stats["thin_provisioning_support"] = True stats["multiattach"] = True + stats["replication_enabled"] = ( + self.replication_enabled and not self._is_failed_over + ) + stats["replication_targets"] = self.replication_targets pools = [] backend_free_capacity = 0 @@ -649,6 +985,8 @@ class VxFlexOSDriver(driver.VolumeDriver): "reserved_percentage": 0, "thin_provisioning_support": pool_support_thin_vols, "thick_provisioning_support": pool_support_thick_vols, + "replication_enabled": stats["replication_enabled"], + "replication_targets": stats["replication_targets"], "multiattach": True, "provisioned_capacity_gb": provisioned_capacity, "max_over_subscription_ratio": @@ -761,24 +1099,40 @@ class VxFlexOSDriver(driver.VolumeDriver): ) return total_capacity_gb, free_capacity_gb, provisioned_capacity_gb - def _check_pool_support_thick_vols(self, domain_name, pool_name): + def _check_pool_support_thick_vols(self, + domain_name, + pool_name, + secondary=False): # storage pools with fine granularity doesn't support # thick volumes - return not self._is_fine_granularity_pool(domain_name, pool_name) + return not self._is_fine_granularity_pool(domain_name, + pool_name, + secondary) - def _check_pool_support_thin_vols(self, domain_name, pool_name): + def _check_pool_support_thin_vols(self, + domain_name, + pool_name, + secondary=False): # thin volumes available since VxFlex OS 2.x - client = self._get_client() + client = self._get_client(secondary) return flex_utils.version_gte(client.query_rest_api_version(), "2.0") - def _check_pool_support_compression(self, domain_name, pool_name): + def _check_pool_support_compression(self, + domain_name, + pool_name, + secondary=False): # volume compression available only in storage pools # with fine granularity - return self._is_fine_granularity_pool(domain_name, pool_name) + return self._is_fine_granularity_pool(domain_name, + pool_name, + secondary) - def _is_fine_granularity_pool(self, domain_name, pool_name): - client = self._get_client() + def _is_fine_granularity_pool(self, + domain_name, + pool_name, + secondary=False): + client = self._get_client(secondary) if flex_utils.version_gte(client.query_rest_api_version(), "3.0"): r = client.get_storage_pool_properties(domain_name, pool_name) diff --git a/cinder/volume/drivers/dell_emc/vxflexos/rest_client.py b/cinder/volume/drivers/dell_emc/vxflexos/rest_client.py index 9e04731ba31..b0e0b74198a 100644 --- a/cinder/volume/drivers/dell_emc/vxflexos/rest_client.py +++ b/cinder/volume/drivers/dell_emc/vxflexos/rest_client.py @@ -37,11 +37,14 @@ ILLEGAL_SYNTAX = 0 class RestClient(object): - def __init__(self, configuration): + def __init__(self, configuration, is_primary=True): self.configuration = configuration + self.is_primary = is_primary self.spCache = simplecache.SimpleCache("Storage Pool", age_minutes=5) self.pdCache = simplecache.SimpleCache("Protection Domain", age_minutes=5) + self.rcgCache = simplecache.SimpleCache("Replication CG", + age_minutes=5) self.rest_ip = None self.rest_port = None self.rest_username = None @@ -72,18 +75,35 @@ class RestClient(object): } def do_setup(self): - self.rest_port = self.configuration.vxflexos_rest_server_port - self.verify_certificate = ( - self.configuration.safe_get("sio_verify_server_certificate") or - self.configuration.safe_get("driver_ssl_cert_verify") + if self.is_primary: + get_config_value = self.configuration.safe_get + else: + replication_targets = self.configuration.safe_get( + "replication_device" + ) + if not replication_targets: + return + elif len(replication_targets) > 1: + msg = _("VxFlex OS does not support more than one " + "replication backend.") + raise exception.InvalidInput(reason=msg) + get_config_value = replication_targets[0].get + self.verify_certificate = bool( + get_config_value("sio_verify_server_certificate") or + get_config_value("driver_ssl_cert_verify") ) - self.rest_ip = self.configuration.safe_get("san_ip") - self.rest_username = self.configuration.safe_get("san_login") - self.rest_password = self.configuration.safe_get("san_password") + self.rest_ip = get_config_value("san_ip") + self.rest_port = int( + get_config_value("vxflexos_rest_server_port") or + get_config_value("sio_rest_server_port") or + 443 + ) + self.rest_username = get_config_value("san_login") + self.rest_password = get_config_value("san_password") if self.verify_certificate: self.certificate_path = ( - self.configuration.safe_get("sio_server_certificate_path") or - self.configuration.safe_get("driver_ssl_cert_path") + get_config_value("sio_server_certificate_path") or + get_config_value("driver_ssl_cert_path") ) if not all([self.rest_ip, self.rest_username, self.rest_password]): msg = _("REST server IP, username and password must be specified.") @@ -146,7 +166,8 @@ class RestClient(object): def create_volume(self, protection_domain_name, storage_pool_name, - volume, + volume_id, + volume_size, provisioning, compression): url = "/types/Volume/instances" @@ -156,9 +177,9 @@ class RestClient(object): pool_id = self.get_storage_pool_id(protection_domain_name, storage_pool_name) LOG.info("Storage Pool id: %s.", pool_id) - volume_name = flex_utils.id_to_base64(volume.id) + volume_name = flex_utils.id_to_base64(volume_id) # units.Mi = 1024 ** 2 - volume_size_kb = volume.size * units.Mi + volume_size_kb = volume_size * units.Mi params = { "protectionDomainId": domain_id, "storagePoolId": pool_id, @@ -196,6 +217,124 @@ class RestClient(object): raise exception.VolumeBackendAPIException(data=msg) return response["volumeIdList"][0] + def _get_replication_cg_id_by_name(self, rcg_name): + url = ("/types/ReplicationConsistencyGroup/instances" + "/action/queryIdByKey") + + if not rcg_name: + msg = _("Unable to query Replication CG id with None name.") + LOG.error(msg) + raise exception.VolumeBackendAPIException(data=msg) + cached_val = self.rcgCache.get_value(rcg_name) + if cached_val is not None: + return cached_val + encoded_rcg_name = urllib.parse.quote(rcg_name, "") + params = {"name": encoded_rcg_name} + r, rcg_id = self.execute_vxflexos_post_request(url, params) + if not rcg_id: + msg = (_("Replication CG with name %s wasn't found.") % rcg_id) + LOG.error(msg) + raise exception.VolumeBackendAPIException(data=msg) + if r.status_code != http_client.OK and "errorCode" in rcg_id: + msg = (_("Failed to get Replication CG id with name " + "%(name)s: %(message)s.") % + {"name": rcg_name, "message": rcg_id["message"]}) + LOG.error(msg) + raise exception.VolumeBackendAPIException(data=msg) + LOG.info("Replication CG id: %s.", rcg_id) + self.rcgCache.update(rcg_name, rcg_id) + return rcg_id + + def _query_volumes_pair(self, + pair_id): + url = "/instances/ReplicationPair::%(pair_id)s" + + r, response = self.execute_vxflexos_get_request(url, pair_id=pair_id) + if r.status_code != http_client.OK and "errorCode" in response: + msg = (_("Failed to query volumes pair %(pair_id)s: %(err)s.") % + {"pair_id": pair_id, "err": response["message"]}) + LOG.error(msg) + raise exception.VolumeBackendAPIException(data=msg) + return response + + def _query_replication_pairs(self): + url = "/types/ReplicationPair/instances" + + r, response = self.execute_vxflexos_get_request(url) + if r.status_code != http_client.OK and "errorCode" in response: + msg = (_("Failed to query replication pairs: %s.") % + response["message"]) + LOG.error(msg) + raise exception.VolumeBackendAPIException(data=msg) + return response + + @staticmethod + def _filter_replication_pairs(replication_pairs, + filter_key, + filter_value): + try: + return next(filter(lambda pair: pair[filter_key] == filter_value, + replication_pairs)) + except StopIteration: + msg = (_("Volume pair for volume with id %s is not found.") + % filter_value) + LOG.error(msg) + raise exception.VolumeBackendAPIException(data=msg) + + def get_volumes_pair_attrs(self, filter_key, filter_value): + replication_pairs = self._query_replication_pairs() + founded = self._filter_replication_pairs(replication_pairs, + filter_key, + filter_value) + pair_id = founded["id"] + remote_pair_id = founded["remoteId"] + vol_provider_id = founded["localVolumeId"] + remote_vol_provider_id = founded["remoteVolumeId"] + return pair_id, remote_pair_id, vol_provider_id, remote_vol_provider_id + + def create_volumes_pair(self, + rcg_name, + source_provider_id, + dest_provider_id): + url = "/types/ReplicationPair/instances" + + rcg_id = self._get_replication_cg_id_by_name(rcg_name) + params = { + "name": source_provider_id, + "replicationConsistencyGroupId": rcg_id, + "copyType": "OnlineCopy", + "sourceVolumeId": source_provider_id, + "destinationVolumeId": dest_provider_id, + } + r, response = self.execute_vxflexos_post_request(url, params, ) + if r.status_code != http_client.OK and "errorCode" in response: + msg = (_("Failed to create volumes pair: %s.") % + response["message"]) + LOG.error(msg) + raise exception.VolumeBackendAPIException(data=msg) + replication_pair = self._query_volumes_pair(response["id"]) + LOG.info("Created volumes pair %(vol_pair_id)s. " + "Remote pair %(remote_pair_id)s.", + { + "vol_pair_id": replication_pair["id"], + "remote_pair_id": replication_pair["remoteId"], + }) + return replication_pair["id"], replication_pair["remoteId"] + + def remove_volumes_pair(self, vol_pair_id): + url = ("/instances/ReplicationPair::%(vol_pair_id)s/action" + "/removeReplicationPair") + + r, response = self.execute_vxflexos_post_request( + url, vol_pair_id=vol_pair_id + ) + if r.status_code != http_client.OK: + msg = (_("Failed to delete volumes pair " + "%(vol_pair_id)s: %(err)s.") % + {"vol_pair_id": vol_pair_id, "err": response["message"]}) + LOG.error(msg) + raise exception.VolumeBackendAPIException(data=msg) + def _get_protection_domain_id_by_name(self, domain_name): url = "/types/Domain/instances/getByName::%(encoded_domain_name)s" @@ -498,3 +637,20 @@ class RestClient(object): else: LOG.info("VxFlex OS volume %(vol_id)s was renamed to " "%(new_name)s.", {"vol_id": vol_id, "new_name": new_name}) + + def failover_failback_replication_cg(self, rcg_name, is_failback): + url = ("/instances/ReplicationConsistencyGroup::%(rcg_id)s" + "/action/%(action)sReplicationConsistencyGroup") + + action = "restore" if is_failback else "failover" + rcg_id = self._get_replication_cg_id_by_name(rcg_name) + r, response = self.execute_vxflexos_post_request(url, + rcg_id=rcg_id, + action=action) + if r.status_code != http_client.OK: + msg = (_("Failed to %(action)s rcg with id %(rcg_id)s: " + "%(err_msg)s.") % {"action": action, + "rcg_id": rcg_id, + "err_msg": response["message"]}) + LOG.error(msg) + raise exception.VolumeBackendAPIException(data=msg) diff --git a/doc/source/configuration/block-storage/drivers/dell-emc-vxflex-driver.rst b/doc/source/configuration/block-storage/drivers/dell-emc-vxflex-driver.rst index 70df49c4b01..d7909b03b06 100644 --- a/doc/source/configuration/block-storage/drivers/dell-emc-vxflex-driver.rst +++ b/doc/source/configuration/block-storage/drivers/dell-emc-vxflex-driver.rst @@ -35,10 +35,15 @@ The Dell EMC VxFlex OS Block Storage driver has been tested against the following versions of ScaleIO and VxFlex OS and found to be compatible: * ScaleIO 2.0.x + * ScaleIO 2.5.x + * VxFlex OS 2.6.x + * VxFlex OS 3.0.x +* VxFlex OS 3.5.x + Please consult the :ref:`scaleio_docs` to determine supported operating systems for each version of VxFlex OS or ScaleIO. @@ -80,6 +85,7 @@ Supported operations * Create, list, update, and delete consistency group snapshots +* OpenStack replication v2.1 support VxFlex OS Block Storage driver configuration -------------------------------------------- @@ -94,7 +100,7 @@ The configuration file is usually located at ``/etc/cinder/cinder.conf``. For a configuration example, refer to the example -:ref:`cinder.conf ` . +:ref:`cinder.conf `. VxFlex OS driver name ~~~~~~~~~~~~~~~~~~~~~ @@ -231,6 +237,8 @@ Volume types can be used to specify characteristics of volumes allocated via the VxFlex OS Driver. These characteristics are defined as ``Extra Specs`` within ``Volume Types``. +.. _vxflexos_pd_sp: + VxFlex OS Protection Domain and Storage Pool ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -301,6 +309,7 @@ is attached to an instance, and thus to a compute node/SDC. VxFlex OS compression support ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Starting from version 3.0, VxFlex OS supports volume compression. By default driver will create volumes without compression. In order to create a compressed volume, a volume type which enables @@ -337,8 +346,114 @@ limit volumes allocation only to data pools which supports compression. .. code-block:: console - $ openstack volume type set --property compression_support=' True' vxflexos_compressed + $ openstack volume type set --property compression_support=' True' vxflexos_compressed +VxFlex OS replication support +----------------------------- + +Starting from version 3.5, VxFlex OS supports volume replication. + +Prerequisites +~~~~~~~~~~~~~ + +* VxFlex OS replication components must be installed on source and destination + systems. + +* Source and destination systems must have the same configuration for + Protection Domains and their Storage Pools (i.e. names, zero padding, etc.). + +* Source and destination systems must be paired and have at least one + Replication Consistency Group created. + +See :ref:`scaleio_docs` for instructions. + +Configure replication +~~~~~~~~~~~~~~~~~~~~~ + +#. Enable replication in ``cinder.conf`` file. + + To enable replication feature for storage backend ``replication_device`` + must be set as below: + + .. code-block:: ini + + [DEFAULT] + enabled_backends = vxflexos + + [vxflexos] + volume_driver = cinder.volume.drivers.dell_emc.vxflexos.driver.VxFlexOSDriver + volume_backend_name = vxflexos + san_ip = GATEWAY_IP + vxflexos_storage_pools = Domain1:Pool1,Domain2:Pool2 + san_login = SIO_USER + san_password = SIO_PASSWD + san_thin_provision = false + replication_device = backend_id:vxflexos_repl, + san_ip: REPLICATION_SYSTEM_GATEWAY_IP, + san_login: REPLICATION_SYSTEM_SIO_USER, + san_password: REPLICATION_SYSTEM_SIO_PASSWD + + * Only one replication device is supported for storage backend. + + * The following parameters are optional for replication device: + + * REST API port - ``vxflexos_rest_server_port``. + + * SSL certificate verification - ``driver_ssl_cert_verify`` and + ``driver_ssl_cert_path``. + + For more information see :ref:`cg_configuration_options_emc`. + +#. Create volume type for volumes with replication enabled. + + .. code-block:: console + + $ openstack volume type create vxflexos_replicated + $ openstack volume type set --property replication_enabled=' True' vxflexos_replicated + +#. Set VxFlex OS Replication Consistency Group name for volume type. + + .. code-block:: console + + $ openstack volume type set --property vxflexos:replication_cg= \ + vxflexos_replicated + +#. Set Protection Domain and Storage Pool if multiple Protection Domains + are specified. + + VxFlex OS Replication Consistency Group is created between source and + destination Protection Domains. If more than one Protection Domain is + specified in ``cinder.conf`` you should set ``pool_name`` property for + volume type with appropriate Protection Domain and Storage Pool. + See :ref:`vxflexos_pd_sp`. + +Failover host +~~~~~~~~~~~~~ + +In the event of a disaster, or where there is a required downtime the +administrator can issue the failover host command: + +.. code-block:: console + + $ cinder failover-host cinder_host@vxflexos --backend_id vxflexos_repl + +After issuing Cinder failover-host command Cinder will switch to configured +replication device, however to get existing instances to use this target and +new paths to volumes it is necessary to first shelve Nova instances and then +unshelve them, this will effectively restart the Nova instance and +re-establish data paths between Nova instances and the volumes. + +.. code-block:: console + + $ nova shelve + $ nova unshelve [--availability-zone ] + +If the primary system becomes available, the administrator can initiate +failback operation using ``--backend_id default``: + +.. code-block:: console + + $ cinder failover-host cinder_host@vxflexos --backend_id default Using VxFlex OS Storage with a containerized overcloud ------------------------------------------------------ diff --git a/doc/source/reference/support-matrix.ini b/doc/source/reference/support-matrix.ini index 1e6ac91956a..d1599d0e9b7 100644 --- a/doc/source/reference/support-matrix.ini +++ b/doc/source/reference/support-matrix.ini @@ -433,7 +433,7 @@ driver.dell_emc_unity=complete driver.dell_emc_vmax_af=complete driver.dell_emc_vmax_3=complete driver.dell_emc_vnx=complete -driver.dell_emc_vxflexos=missing +driver.dell_emc_vxflexos=complete driver.dell_emc_xtremio=missing driver.fujitsu_eternus=missing driver.hpe_3par=complete diff --git a/releasenotes/notes/vxflexos-replication-support-f43e62df35e16e3a.yaml b/releasenotes/notes/vxflexos-replication-support-f43e62df35e16e3a.yaml new file mode 100644 index 00000000000..4b2b516f1a3 --- /dev/null +++ b/releasenotes/notes/vxflexos-replication-support-f43e62df35e16e3a.yaml @@ -0,0 +1,5 @@ +--- +features: + - | + VxFlex OS driver now supports OpenStack volume replication v2.1 for + VxFlex OS v3.5.0 storage backends.