PowerMax Driver - Concurrent live migrations failure

Concurrent live migrations can sometimes fail when one thread
deletes a storage group that another thread may need. Also
allowing for legacy short host name format in Live Migrations.

Change-Id: Iaacca9d6bce14995efa0347f84f918c175ac12cb
Closes-Bug: #1875478
This commit is contained in:
Helen Walsh 2020-04-27 20:47:25 +01:00
parent e82ff57b70
commit 03f5dce332
5 changed files with 115 additions and 20 deletions

View File

@ -3281,7 +3281,7 @@ class PowerMaxCommonTest(test.TestCase):
False, True, target_extra_specs['rep_mode'])
mck_move_vol.assert_called_once_with(
array, device_id, self.data.volume_details[0]['storageGroupId'][0],
group_name, extra_specs, force=True)
group_name, extra_specs, force=True, parent_sg=None)
mck_return_vol.assert_called_once_with(
array, volume, device_id, volume_name, extra_specs)
mck_is_vol.assert_called_once_with(array, device_id, group_name)

View File

@ -869,7 +869,7 @@ class PowerMaxMaskingTest(test.TestCase):
self.data.masking_view_name_f]
with mock.patch.object(
rest.PowerMaxRest, 'get_masking_views_by_initiator_group',
side_effect=[mv_list, mv_list, [], []]):
side_effect=[mv_list, []]):
self.mask._last_volume_delete_initiator_group(
self.data.array, self.data.initiatorgroup_name_i,
self.data.connector['host'])
@ -1192,3 +1192,41 @@ class PowerMaxMaskingTest(test.TestCase):
self.data.array, self.data.parent_sg_f))
self.assertEqual('host-with-dash', host_label)
self.assertEqual('portgroup-with-dashes', port_group_label)
@mock.patch.object(
rest.PowerMaxRest, 'is_child_sg_in_parent_sg', return_value=False)
@mock.patch.object(
rest.PowerMaxRest, 'add_child_sg_to_parent_sg')
@mock.patch.object(
rest.PowerMaxRest, 'get_storage_group',
side_effect=[None, tpd.PowerMaxData.sg_details[1],
tpd.PowerMaxData.sg_details[2]])
@mock.patch.object(
provision.PowerMaxProvision, 'create_storage_group')
def test_check_child_storage_group_exists_false(
self, mock_create, mock_get, mock_add, mock_check):
self.mask._check_child_storage_group_exists(
self.data.device_id, self.data.array,
self.data.storagegroup_name_i, self.data.extra_specs,
self.data.parent_sg_i)
mock_create.assert_called_once()
mock_add.assert_called_once()
@mock.patch.object(
rest.PowerMaxRest, 'is_child_sg_in_parent_sg', return_value=True)
@mock.patch.object(
rest.PowerMaxRest, 'add_child_sg_to_parent_sg')
@mock.patch.object(
rest.PowerMaxRest, 'get_storage_group',
side_effect=[tpd.PowerMaxData.sg_details[1],
tpd.PowerMaxData.sg_details[3]])
@mock.patch.object(
provision.PowerMaxProvision, 'create_storage_group')
def test_check_child_storage_group_exists_true(
self, mock_create, mock_get, mock_add, mock_check):
self.mask._check_child_storage_group_exists(
self.data.device_id, self.data.array,
self.data.storagegroup_name_i, self.data.extra_specs,
self.data.parent_sg_i)
mock_create.assert_not_called
mock_add.assert_not_called()

View File

@ -3819,6 +3819,7 @@ class PowerMaxCommon(object):
"""
is_re, rep_mode, mgmt_sg_name = False, None, None
parent_sg = None
if self.utils.is_replication_enabled(target_extra_specs):
is_re, rep_mode = True, target_extra_specs['rep_mode']
mgmt_sg_name = self.utils.get_rdf_management_group_name(
@ -3876,7 +3877,7 @@ class PowerMaxCommon(object):
# Move the volume from the source to target storage group
self.masking.move_volume_between_storage_groups(
array, device_id, source_sg_name, target_sg_name, extra_specs,
force=True)
force=True, parent_sg=parent_sg)
# Check if volume should be member of GVG
self.masking.return_volume_to_volume_group(

View File

@ -133,7 +133,8 @@ class PowerMaxMasking(object):
def _move_vol_from_default_sg(
self, serial_number, device_id, volume_name,
default_sg_name, dest_storagegroup, extra_specs):
default_sg_name, dest_storagegroup, extra_specs,
parent_sg_name=None):
"""Get the default storage group and move the volume.
:param serial_number: the array serial number
@ -142,6 +143,7 @@ class PowerMaxMasking(object):
:param default_sg_name: the name of the default sg
:param dest_storagegroup: the destination storage group
:param extra_specs: the extra specifications
:param parent_sg_name: optional parent storage group name
:returns: msg
"""
msg = None
@ -151,7 +153,8 @@ class PowerMaxMasking(object):
try:
self.move_volume_between_storage_groups(
serial_number, device_id, default_sg_name,
dest_storagegroup, extra_specs)
dest_storagegroup, extra_specs,
parent_sg=parent_sg_name)
except Exception as e:
msg = ("Exception while moving volume from the default "
"storage group to %(sg)s. Exception received was "
@ -270,7 +273,8 @@ class PowerMaxMasking(object):
error_message = self._move_vol_from_default_sg(
serial_number, masking_view_dict[utils.DEVICE_ID],
masking_view_dict[utils.VOL_NAME], default_sg_name,
storagegroup_name, extra_specs)
storagegroup_name, extra_specs,
parent_sg_name=parent_sg_name)
if error_message:
return error_message
@ -517,7 +521,8 @@ class PowerMaxMasking(object):
msg = self._move_vol_from_default_sg(
serial_number, masking_view_dict[utils.DEVICE_ID],
masking_view_dict[utils.VOL_NAME], default_sg_name,
child_sg_name, masking_view_dict[utils.EXTRA_SPECS])
child_sg_name, masking_view_dict[utils.EXTRA_SPECS],
parent_sg_name=sg_from_mv)
if not msg and not check_child:
msg = self._check_add_child_sg_to_parent_sg(
serial_number, child_sg_name, sg_from_mv,
@ -531,7 +536,8 @@ class PowerMaxMasking(object):
"emc-sg-{target_storagegroup_name}-{serial_number}")
def move_volume_between_storage_groups(
self, serial_number, device_id, source_storagegroup_name,
target_storagegroup_name, extra_specs, force=False):
target_storagegroup_name, extra_specs, force=False,
parent_sg=None):
"""Move a volume between storage groups.
:param serial_number: the array serial number
@ -540,7 +546,11 @@ class PowerMaxMasking(object):
:param target_storagegroup_name: the target sg
:param extra_specs: the extra specifications
:param force: optional Force flag required for replicated vols
:param parent_sg: optional Parent storage group
"""
self._check_child_storage_group_exists(
device_id, serial_number, target_storagegroup_name,
extra_specs, parent_sg)
num_vol_in_sg = self.rest.get_num_vols_in_sg(
serial_number, source_storagegroup_name)
LOG.debug("There are %(num_vol)d volumes in the "
@ -735,7 +745,7 @@ class PowerMaxMasking(object):
temp_device_id_list = list_device_id
@coordination.synchronized("emc-sg-{sg_name}-{serial_number}")
def do_add_volume_to_sg(sg_name, serial_number):
def do_add_volumes_to_sg(sg_name, serial_number):
# Check if another process has added any volume to the
# sg while this process was waiting for the lock
volume_list = self.rest.get_volumes_in_storage_group(
@ -750,7 +760,7 @@ class PowerMaxMasking(object):
temp_device_id_list.remove(volume)
self.rest.add_vol_to_sg(serial_number, storagegroup_name,
temp_device_id_list, extra_specs)
do_add_volume_to_sg(storagegroup_name, serial_number)
do_add_volumes_to_sg(storagegroup_name, serial_number)
LOG.debug("Add volumes to storagegroup took: %(delta)s H:MM:SS.",
{'delta': self.utils.get_time_delta(start_time,
@ -1135,6 +1145,9 @@ class PowerMaxMasking(object):
short_host_name = self.utils.get_host_name_label(
connector.get('host'),
host_template) if connector.get('host') else None
# Legacy code
legacy_short_host_name = self.utils.get_host_short_name(
connector.get('host')) if connector.get('host') else None
move = reset
if short_host_name:
for sg_name in storagegroup_names:
@ -1144,6 +1157,13 @@ class PowerMaxMasking(object):
extra_specs, connector, move,
host_template=host_template)
break
elif legacy_short_host_name and (
legacy_short_host_name in sg_name):
self.remove_volume_from_sg(
serial_number, device_id, volume_name, sg_name,
extra_specs, connector, move,
host_template=host_template)
break
else:
for sg_name in storagegroup_names:
self.remove_volume_from_sg(
@ -1707,6 +1727,7 @@ class PowerMaxMasking(object):
return is_deleted
if host is not None:
is_deleted = False
host_label = (self.utils.get_host_name_label(
host, host_template) if host else None)
default_ig_name = self.utils.get_possible_initiator_name(
@ -1715,15 +1736,16 @@ class PowerMaxMasking(object):
if initiatorgroup_name == default_ig_name:
is_deleted = _do_delete_initiator_group(
serial_number, initiatorgroup_name)
if not is_deleted:
host_label = (self.utils.get_host_short_name(
host) if host else None)
default_ig_name = self.utils.get_possible_initiator_name(
host_label, self.protocol)
if initiatorgroup_name == default_ig_name:
_do_delete_initiator_group(
serial_number, initiatorgroup_name)
else:
# Legacy cleanup
legacy_short_host = (self.utils.get_host_short_name(
host) if host else None)
default_ig_name = self.utils.get_possible_initiator_name(
legacy_short_host, self.protocol)
if initiatorgroup_name == default_ig_name:
is_deleted = _do_delete_initiator_group(
serial_number, initiatorgroup_name)
if not is_deleted:
LOG.warning("Initiator group %(ig_name)s was "
"not created by the PowerMax driver so will "
"not be deleted by the PowerMax driver.",
@ -1778,7 +1800,7 @@ class PowerMaxMasking(object):
serial_number, no_slo_sg_name, parent_sg_name, extra_specs)
self.move_volume_between_storage_groups(
serial_number, device_id, fast_source_sg_name,
no_slo_sg_name, extra_specs)
no_slo_sg_name, extra_specs, parent_sg=parent_sg_name)
# Clean up the fast managed group, if required
self._clean_up_child_storage_group(
serial_number, fast_source_sg_name,
@ -1886,7 +1908,8 @@ class PowerMaxMasking(object):
# Add or move volume to fast sg
self._move_vol_from_default_sg(
serial_number, device_id, device_id,
no_slo_sg_name, fast_sg_name, extra_specs)
no_slo_sg_name, fast_sg_name, extra_specs,
parent_sg_name=parent_sg_name)
else:
LOG.debug("Volume already a member of the FAST managed storage "
"group.")
@ -1937,3 +1960,31 @@ class PowerMaxMasking(object):
self._check_ig_rollback(
serial_number, initiator_group_name, connector, force)
def _check_child_storage_group_exists(
self, device_id, serial_number, child_sg_name, extra_specs,
parent_sg_name):
"""Check if the storage group exists.
If the storage group does not exist create it and add it to the
parent
:param device_id: device id
:param serial_number: extra the array serial number
:param child_sg_name: child storage group
:param extra_specs: extra specifications
:param parent_sg_name: parent storage group
"""
disable_compr = self.utils.is_compression_disabled(extra_specs)
mv_dict = {utils.DISABLECOMPRESSION: disable_compr,
utils.VOL_NAME: device_id}
# Get or create the storage group
self._get_or_create_storage_group(
serial_number, mv_dict, child_sg_name, extra_specs)
if parent_sg_name:
if not self.rest.is_child_sg_in_parent_sg(
serial_number, child_sg_name, parent_sg_name):
self.rest.add_child_sg_to_parent_sg(
serial_number, child_sg_name, parent_sg_name,
extra_specs)

View File

@ -0,0 +1,5 @@
---
fixes:
- |
PowerMax Driver - Concurrent live migrations can sometimes fail when one
thread deletes a storage group that another thread may need.