Add support for volume migration in VxFlex OS driver

Add support for storage-assisted volume migration in VxFlex OS driver.

Implements: blueprint vxflexos-migration-support
Change-Id: Ia686afcc050b805b3479314964d341ca243dbfae
This commit is contained in:
Ivan Pchelintsev 2020-03-06 16:27:37 +03:00
parent 4e320543cf
commit 521a49f04c
8 changed files with 532 additions and 10 deletions

View File

@ -92,7 +92,7 @@ class TestCreateVolume(vxflexos.TestVxFlexOSDriver):
self.assertRaises(exception.VolumeBackendAPIException,
self.test_create_volume)
@ddt.data({'provisioning:type': 'thin'}, {'provisioning:type': 'thin'})
@ddt.data({'provisioning:type': 'thin'}, {'provisioning:type': 'thick'})
def test_create_thin_thick_volume(self, extraspecs):
self.driver._get_volumetype_extraspecs = mock.MagicMock()
self.driver._get_volumetype_extraspecs.return_value = extraspecs

View File

@ -0,0 +1,221 @@
# Copyright (c) 2020 Dell Inc. or its subsidiaries.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from unittest import mock
import ddt
from oslo_service import loopingcall
import six
from cinder import context
from cinder import exception
from cinder.tests.unit import fake_constants as fake
from cinder.tests.unit import fake_volume
from cinder.tests.unit.volume.drivers.dell_emc import vxflexos
MIGRATE_VOLUME_PARAMS_CASES = (
# Cases for testing _get_volume_params function.
# +----------------------------------------------------+------------------+
# |Volume type|Real provisioning|Conversion|Compression|Pool support thick|
# +-----------+-----------------+----------+-----------+-----+------------+
('thin', 'ThinProvisioned', 'NoConversion', 'None', False),
('thin', 'ThickProvisioned', 'ThickToThin', 'None', True),
('thick', 'ThinProvisioned', 'NoConversion', 'None', False),
('thick', 'ThinProvisioned', 'ThinToThick', 'None', True),
('compressed', 'ThinProvisioned', 'NoConversion', 'Normal', False),
('compressed', 'ThickProvisioned', 'ThickToThin', 'Normal', False),
('compressed', 'ThickProvisioned', 'ThickToThin', 'None', False)
)
@ddt.ddt
class TestMigrateVolume(vxflexos.TestVxFlexOSDriver):
"""Test cases for ``VxFlexOSDriver.migrate_volume()``"""
def setUp(self):
"""Setup a test case environment.
Creates a fake volume object and sets up the required API responses.
"""
super(TestMigrateVolume, self).setUp()
ctx = context.RequestContext('fake', 'fake', auth_token=True)
host = 'host@backend#{}:{}'.format(
self.PROT_DOMAIN_NAME,
self.STORAGE_POOL_NAME)
self.volume = fake_volume.fake_volume_obj(
ctx, **{'provider_id': fake.PROVIDER_ID, 'host': host,
'volume_type_id': fake.VOLUME_TYPE_ID})
self.dst_host = {'host': host}
self.DST_STORAGE_POOL_NAME = 'SP2'
self.DST_STORAGE_POOL_ID = six.text_type('2')
self.fake_vtree_id = 'c075744900000001'
self.migration_success = (True, {})
self.migration_host_assisted = (False, None)
self.HTTPS_MOCK_RESPONSES = {
self.RESPONSE_MODE.Valid: {
'types/Domain/instances/getByName::{}'.format(
self.PROT_DOMAIN_NAME
): '"{}"'.format(self.PROT_DOMAIN_ID),
'types/Pool/instances/getByName::{},{}'.format(
self.PROT_DOMAIN_ID,
self.STORAGE_POOL_NAME
): '"{}"'.format(self.STORAGE_POOL_ID),
'types/Pool/instances/getByName::{},{}'.format(
self.PROT_DOMAIN_ID,
self.DST_STORAGE_POOL_NAME
): '"{}"'.format(self.DST_STORAGE_POOL_ID),
'instances/ProtectionDomain::{}'.format(
self.PROT_DOMAIN_ID
): {'id': self.PROT_DOMAIN_ID},
'instances/StoragePool::{}'.format(
self.STORAGE_POOL_ID
): {'id': self.STORAGE_POOL_ID,
'zeroPaddingEnabled': True},
'instances/StoragePool::{}'.format(
self.DST_STORAGE_POOL_ID
): {'id': self.DST_STORAGE_POOL_ID,
'zeroPaddingEnabled': True},
'instances/Volume::{}'.format(
self.volume.provider_id
): {'volumeType': 'ThinProvisioned',
'vtreeId': self.fake_vtree_id},
'instances/Volume::{}/action/migrateVTree'.format(
self.volume.provider_id
): {},
'instances/VTree::{}'.format(
self.fake_vtree_id
): {'vtreeMigrationInfo': {
'migrationStatus': 'NotInMigration',
'migrationPauseReason': None}}
},
self.RESPONSE_MODE.Invalid: {
'instances/Volume::{}'.format(
self.volume.provider_id
): {'vtreeId': self.fake_vtree_id},
'instances/VTree::{}'.format(
self.fake_vtree_id
): {'vtreeMigrationInfo': {'migrationPauseReason': None}}
},
self.RESPONSE_MODE.BadStatus: {
'instances/Volume::{}/action/migrateVTree'.format(
self.volume.provider_id
): self.BAD_STATUS_RESPONSE
},
}
self.volumetype_extraspecs_mock = self.mock_object(
self.driver, '_get_volumetype_extraspecs',
return_value={'provisioning:type': 'thin'}
)
self.volume_is_replicated_mock = self.mock_object(
self.volume, 'is_replicated',
return_value=False
)
def test_migrate_volume(self):
ret = self.driver.migrate_volume(None, self.volume, self.dst_host)
self.assertEqual(self.migration_success, ret)
def test_migrate_replicated_volume(self):
self.volume_is_replicated_mock.return_value = True
self.assertRaises(exception.InvalidVolume,
self.driver.migrate_volume,
None, self.volume, self.dst_host)
def test_migrate_volume_crossbackend_not_supported(self):
dst_host = {'host': 'host@another_backend#PD1:P1'}
ret = self.driver.migrate_volume(None, self.volume, dst_host)
self.assertEqual(self.migration_host_assisted, ret)
def test_migrate_volume_bad_status_response(self):
with self.custom_response_mode(
**{'instances/Volume::{}/action/migrateVTree'.format(
self.volume.provider_id): self.RESPONSE_MODE.BadStatus}
):
self.assertRaises(exception.VolumeBackendAPIException,
self.driver.migrate_volume,
None, self.volume, self.dst_host)
def test_migrate_volume_migration_in_progress(self):
with self.custom_response_mode(
**{'instances/Volume::{}/action/migrateVTree'.format(
self.volume.provider_id): vxflexos.mocks.MockHTTPSResponse(
{
'errorCode': 717,
'message': 'Migration in progress',
}, 500)}
):
ret = self.driver.migrate_volume(None, self.volume, self.dst_host)
self.assertEqual(self.migration_success, ret)
@mock.patch(
'cinder.volume.drivers.dell_emc.vxflexos.driver.VxFlexOSDriver.'
'_wait_for_volume_migration_to_complete',
side_effect=loopingcall.LoopingCallTimeOut()
)
def test_migrate_volume_migration_in_progress_timeout_expired(self, m):
_, upd = self.driver.migrate_volume(None, self.volume, self.dst_host)
self.assertEqual('maintenance', upd['status'])
def test_migrate_volume_migration_failed(self):
with self.custom_response_mode(
**{'instances/VTree::{}'.format(self.fake_vtree_id):
vxflexos.mocks.MockHTTPSResponse(
{'vtreeMigrationInfo':
{'migrationStatus': 'NotInMigration',
'migrationPauseReason': 'MigrationError'}}, 200)}
):
self.assertRaises(exception.VolumeMigrationFailed,
self.driver.migrate_volume,
None, self.volume, self.dst_host)
def test_get_real_provisioning_and_vtree_malformed_response(self):
self.set_https_response_mode(self.RESPONSE_MODE.Invalid)
self.assertRaises(exception.MalformedResponse,
self.driver._get_real_provisioning_and_vtree,
self.volume.provider_id)
def test_wait_for_volume_migration_to_complete_malformed_response(self):
self.set_https_response_mode(self.RESPONSE_MODE.Invalid)
self.assertRaises(exception.MalformedResponse,
self.driver._wait_for_volume_migration_to_complete,
self.fake_vtree_id, self.volume.provider_id)
@ddt.data(*MIGRATE_VOLUME_PARAMS_CASES)
def test_get_migrate_volume_params(self, data):
(vol_type,
real_prov,
conversion,
compression,
sup_thick) = data
self.mock_object(self.driver, '_get_provisioning_and_compression',
return_value=(vol_type, compression))
self.mock_object(self.driver, '_check_pool_support_thick_vols',
return_value=sup_thick)
domain_name, pool_name = (
self.driver._extract_domain_and_pool_from_host(
self.dst_host['host']
)
)
ret = self.driver._get_volume_migration_params(self.volume,
domain_name,
pool_name,
real_prov)
self.assertTrue(ret['volTypeConversion'] == conversion)
self.assertTrue(ret['compressionMethod'] == compression)

View File

@ -23,6 +23,7 @@ from os_brick import initiator
from oslo_config import cfg
from oslo_log import log as logging
from oslo_log import versionutils
from oslo_service import loopingcall
from oslo_utils import excutils
from oslo_utils import units
import six
@ -87,9 +88,10 @@ class VxFlexOSDriver(driver.VolumeDriver):
3.0.0 - Add support for VxFlex OS 3.0.x and for volumes compression
3.5.0 - Add support for VxFlex OS 3.5.x
3.5.1 - Add volume replication v2.1 support for VxFlex OS 3.5.x
3.5.2 - Add volume migration support
"""
VERSION = "3.5.1"
VERSION = "3.5.2"
# ThirdPartySystems wiki
CI_WIKI_NAME = "DellEMC_VxFlexOS_CI"
@ -130,6 +132,13 @@ class VxFlexOSDriver(driver.VolumeDriver):
def get_driver_options():
return vxflexos_opts
@staticmethod
def _extract_domain_and_pool_from_host(host):
pd_sp = volume_utils.extract_host(host, "pool")
protection_domain_name = pd_sp.split(":")[0]
storage_pool_name = pd_sp.split(":")[1]
return protection_domain_name, storage_pool_name
@property
def _available_failover_choices(self):
"""Available choices to failover/failback host."""
@ -354,9 +363,9 @@ class VxFlexOSDriver(driver.VolumeDriver):
LOG.info("Configure replication for %(entity_type)s %(id)s. ",
{"entity_type": entity_type, "id": vol_or_snap.id})
try:
pd_sp = volume_utils.extract_host(entity.host, "pool")
protection_domain_name = pd_sp.split(":")[0]
storage_pool_name = pd_sp.split(":")[1]
protection_domain_name, storage_pool_name = (
self._extract_domain_and_pool_from_host(entity.host)
)
self._check_volume_creation_safe(protection_domain_name,
storage_pool_name,
secondary=True)
@ -588,9 +597,9 @@ class VxFlexOSDriver(driver.VolumeDriver):
client = self._get_client()
self._check_volume_size(volume.size)
pd_sp = volume_utils.extract_host(volume.host, "pool")
protection_domain_name = pd_sp.split(":")[0]
storage_pool_name = pd_sp.split(":")[1]
protection_domain_name, storage_pool_name = (
self._extract_domain_and_pool_from_host(volume.host)
)
self._check_volume_creation_safe(protection_domain_name,
storage_pool_name)
storage_type = self._get_volumetype_extraspecs(volume)
@ -1245,6 +1254,212 @@ class VxFlexOSDriver(driver.VolumeDriver):
finally:
self._sio_detach_volume(volume)
def migrate_volume(self, ctxt, volume, host):
"""Migrate VxFlex OS volume within the same backend."""
LOG.info("Migrate volume %(vol_id)s to %(host)s.",
{"vol_id": volume.id, "host": host["host"]})
client = self._get_client()
def fall_back_to_host_assisted():
LOG.debug("Falling back to host-assisted migration.")
return False, None
if volume.is_replicated():
msg = _("Migration of replicated volumes is not allowed.")
LOG.error(msg)
raise exception.InvalidVolume(reason=msg)
# Check migration between different backends
src_backend = volume_utils.extract_host(volume.host, "backend")
dst_backend = volume_utils.extract_host(host["host"], "backend")
if src_backend != dst_backend:
LOG.debug("Cross-backends migration is not supported "
"by VxFlex OS.")
return fall_back_to_host_assisted()
# Check migration is supported by storage API
if not flex_utils.version_gte(client.query_rest_api_version(), "3.0"):
LOG.debug("VxFlex OS versions less than v3.0 do not "
"support volume migration.")
return fall_back_to_host_assisted()
# Check storage pools compatibility
src_pd, src_sp = self._extract_domain_and_pool_from_host(volume.host)
dst_pd, dst_sp = self._extract_domain_and_pool_from_host(host["host"])
if not self._pools_compatible_for_migration(src_pd,
src_sp,
dst_pd,
dst_sp):
return fall_back_to_host_assisted()
real_provisioning, vtree_id = (
self._get_real_provisioning_and_vtree(volume.provider_id)
)
params = self._get_volume_migration_params(volume,
dst_pd,
dst_sp,
real_provisioning)
client.migrate_vtree(volume, params)
try:
self._wait_for_volume_migration_to_complete(vtree_id, volume.id)
except loopingcall.LoopingCallTimeOut:
# Volume migration is still in progress but timeout has expired.
# Volume status is set to maintenance to prevent performing other
# operations with volume. Migration status should be checked on the
# storage side. If the migration successfully completed, volume
# status should be manually changed to AVAILABLE.
updates = {
"status": fields.VolumeStatus.MAINTENANCE,
}
msg = (_("Migration of volume %s is still in progress "
"but timeout has expired. Volume status is set to "
"maintenance to prevent performing operations with this "
"volume. Check the migration status "
"on the storage side and set volume status manually if "
"migration succeeded.") % volume.id)
LOG.warning(msg)
return True, updates
return True, {}
def _pools_compatible_for_migration(self, src_pd, src_sp, dst_pd, dst_sp):
"""Compare storage pools properties to determine migration possibility.
Limitations:
- For migration from Medium Granularity (MG) to Fine Granularity (FG)
storage pool zero padding must be enabled on the MG pool.
- For migration from MG to MG pool zero padding must be either enabled
or disabled on both pools.
"""
client = self._get_client()
src_zero_padding_enabled = client.is_volume_creation_safe(src_pd,
src_sp)
dst_zero_padding_enabled = client.is_volume_creation_safe(dst_pd,
dst_sp)
src_is_fg_pool = self._is_fine_granularity_pool(src_pd, src_sp)
dst_is_fg_pool = self._is_fine_granularity_pool(dst_pd, dst_sp)
if src_is_fg_pool:
return True
elif dst_is_fg_pool:
if not src_zero_padding_enabled:
LOG.debug("Migration from Medium Granularity storage pool "
"with zero padding disabled to Fine Granularity one "
"is not allowed.")
return False
return True
elif not src_zero_padding_enabled == dst_zero_padding_enabled:
LOG.debug("Zero padding must be either enabled or disabled on "
"both storage pools.")
return False
return True
def _get_real_provisioning_and_vtree(self, provider_id):
"""Get volume real provisioning type and vtree_id."""
response = self._get_client().query_volume(provider_id)
try:
provisioning = response["volumeType"]
vtree_id = response["vtreeId"]
return provisioning, vtree_id
except KeyError:
msg = (_("Query volume response does not contain "
"required fields: volumeType and vtreeId."))
LOG.error(msg)
raise exception.MalformedResponse(
cmd="_get_real_provisioning_and_vtree",
reason=msg
)
def _get_volume_migration_params(self,
volume,
dst_domain_name,
dst_pool_name,
real_provisioning):
client = self._get_client()
dst_pool_id = client.get_storage_pool_id(dst_domain_name,
dst_pool_name)
params = {
"destSPId": dst_pool_id,
"volTypeConversion": "NoConversion",
"compressionMethod": "None",
"allowDuringRebuild": six.text_type(
self.configuration.vxflexos_allow_migration_during_rebuild
),
}
storage_type = self._get_volumetype_extraspecs(volume)
provisioning, compression = self._get_provisioning_and_compression(
storage_type,
dst_domain_name,
dst_pool_name
)
pool_supports_thick_vols = self._check_pool_support_thick_vols(
dst_domain_name,
dst_pool_name
)
if (
real_provisioning == "ThickProvisioned" and
(provisioning in ["thin", "compressed"] or
not pool_supports_thick_vols)
):
params["volTypeConversion"] = "ThickToThin"
elif (
real_provisioning == "ThinProvisioned" and
provisioning == "thick" and
pool_supports_thick_vols
):
params["volTypeConversion"] = "ThinToThick"
params["compressionMethod"] = compression
return params
@utils.retry(exception.VolumeBackendAPIException,
interval=5, backoff_rate=1, retries=3)
def _wait_for_volume_migration_to_complete(self, vtree_id, vol_id):
"""Check volume migration status."""
LOG.debug("Wait for migration of volume %s to complete.", vol_id)
def _inner():
response = self._get_client().query_vtree(vtree_id, vol_id)
try:
migration_status = (
response["vtreeMigrationInfo"]["migrationStatus"]
)
migration_pause_reason = (
response["vtreeMigrationInfo"]["migrationPauseReason"]
)
if (
migration_status == "NotInMigration" and
not migration_pause_reason
):
# Migration completed successfully.
raise loopingcall.LoopingCallDone()
elif migration_pause_reason:
# Migration failed or paused on the storage side.
# Volume remains on the source backend.
msg = (_("Migration of volume %(vol_id)s failed or "
"paused on the storage side. "
"Migration status: %(status)s, "
"pause reason: %(reason)s.") %
{"vol_id": vol_id,
"status": migration_status,
"reason": migration_pause_reason})
LOG.error(msg)
raise exception.VolumeMigrationFailed(msg)
except KeyError:
msg = (_("Check Migration status response does not contain "
"required fields: migrationStatus and "
"migrationPauseReason."))
LOG.error(msg)
raise exception.MalformedResponse(
cmd="_wait_for_volume_migration_to_complete",
reason=msg
)
timer = loopingcall.FixedIntervalWithTimeoutLoopingCall(_inner)
timer.start(interval=30, timeout=3600).wait()
def update_migrated_volume(self,
ctxt,
volume,

View File

@ -38,6 +38,9 @@ VXFLEXOS_STORAGE_POOLS = "vxflexos_storage_pools"
VXFLEXOS_SERVER_API_VERSION = "vxflexos_server_api_version"
VXFLEXOS_MAX_OVER_SUBSCRIPTION_RATIO = "vxflexos_max_over_subscription_ratio"
VXFLEXOS_ALLOW_NON_PADDED_VOLUMES = "vxflexos_allow_non_padded_volumes"
VXFLEXOS_ALLOW_MIGRATION_DURING_REBUILD = (
"vxflexos_allow_migration_during_rebuild"
)
deprecated_opts = [
cfg.PortOpt(SIO_REST_SERVER_PORT,
@ -142,4 +145,7 @@ actual_opts = [
'not be enabled if multiple tenants will utilize '
'volumes from a shared Storage Pool.',
deprecated_name=SIO_ALLOW_NON_PADDED_VOLUMES),
cfg.BoolOpt(VXFLEXOS_ALLOW_MIGRATION_DURING_REBUILD,
default=False,
help='Allow volume migration during rebuild.'),
]

View File

@ -31,6 +31,8 @@ from cinder.volume.drivers.dell_emc.vxflexos import utils as flex_utils
LOG = logging.getLogger(__name__)
VOLUME_MIGRATION_IN_PROGRESS_ERROR = 717
VOLUME_MIGRATION_ALREADY_ON_DESTINATION_POOL_ERROR = 718
VOLUME_NOT_FOUND_ERROR = 79
OLD_VOLUME_NOT_FOUND_ERROR = 78
ILLEGAL_SYNTAX = 0
@ -654,3 +656,33 @@ class RestClient(object):
"err_msg": response["message"]})
LOG.error(msg)
raise exception.VolumeBackendAPIException(data=msg)
def query_vtree(self, vtree_id, vol_id):
url = "/instances/VTree::%(vtree_id)s"
r, response = self.execute_vxflexos_get_request(url, vtree_id=vtree_id)
if r.status_code != http_client.OK:
msg = (_("Failed to check migration status of volume %s.")
% vol_id)
LOG.error(msg)
raise exception.VolumeBackendAPIException(msg)
return response
def migrate_vtree(self, volume, params):
url = "/instances/Volume::%(vol_id)s/action/migrateVTree"
r, response = self.execute_vxflexos_post_request(
url,
params=params,
vol_id=volume.provider_id
)
if r.status_code != http_client.OK:
error_code = response["errorCode"]
if error_code not in [
VOLUME_MIGRATION_IN_PROGRESS_ERROR,
VOLUME_MIGRATION_ALREADY_ON_DESTINATION_POOL_ERROR,
]:
msg = (_("Failed to migrate volume %s.") % volume.id)
LOG.error(msg)
raise exception.VolumeBackendAPIException(msg)
return response

View File

@ -67,7 +67,7 @@ Deployment prerequisites
Supported operations
~~~~~~~~~~~~~~~~~~~~
* Create, delete, clone, attach, detach, manage, and unmanage volumes
* Create, delete, clone, attach, detach, migrate, manage, and unmanage volumes
* Create, delete, manage, and unmanage volume snapshots
@ -455,6 +455,50 @@ failback operation using ``--backend_id default``:
$ cinder failover-host cinder_host@vxflexos --backend_id default
VxFlex OS storage-assisted volume migration
-------------------------------------------
Starting from version 3.0, VxFlex OS supports storage-assisted volume
migration.
Known limitations
~~~~~~~~~~~~~~~~~
* Migration between different backends is not supported.
* For migration from Medium Granularity (MG) to Fine Granularity (FG)
storage pool zero padding must be enabled on the MG pool.
* For migration from MG to MG pool zero padding must be either enabled
or disabled on both pools.
In the above cases host-assisted migration will be perfomed.
Migrate volume
~~~~~~~~~~~~~~
Volume migration is performed by issuing the following command:
.. code-block:: console
$ cinder migrate <volume> <host>
.. note:: Volume migration has a timeout of 3600 seconds (1 hour).
It is done to prevent from endless waiting for migration to
complete if something unexpected happened. If volume still is in
migration after timeout has expired, volume status will be changed to
``maintenance`` to prevent future operations with this volume. The
corresponding warning will be logged.
In this situation the status of the volume should be checked on the
storage side. If volume migration succeeded, its status can be
changed manually:
.. code-block:: console
$ cinder reset-state --state available <volume>
Using VxFlex OS Storage with a containerized overcloud
------------------------------------------------------

View File

@ -618,7 +618,7 @@ driver.dell_emc_unity=complete
driver.dell_emc_vmax_af=complete
driver.dell_emc_vmax_3=complete
driver.dell_emc_vnx=complete
driver.dell_emc_vxflexos=missing
driver.dell_emc_vxflexos=complete
driver.dell_emc_xtremio=missing
driver.fujitsu_eternus=missing
driver.hpe_3par=missing

View File

@ -0,0 +1,4 @@
---
features:
- |
VxFlex OS driver now supports storage-assisted volume migration.