From 1e6eb4c0e91a4fa1e91055e3044cc84ff9fe4520 Mon Sep 17 00:00:00 2001 From: Helen Walsh Date: Thu, 17 Jan 2019 15:30:44 +0000 Subject: [PATCH] PowerMax driver - solve issue of concurrent snapvx operations Stricter locking is required around snapvx operations on the same source volume to prevent concurrency issues, most notibly around the 'create_cloned_volume' api and the use of snapvx generations. Change-Id: Ia509adedfa937b7770de8a9a603d4aac0dce71e7 Closes-Bug: #1812231 --- .../dell_emc/powermax/test_powermax.py | 60 +++++--- .../drivers/dell_emc/powermax/common.py | 137 +++++++++++++----- .../drivers/dell_emc/powermax/metadata.py | 9 +- .../drivers/dell_emc/powermax/provision.py | 78 +++++----- .../volume/drivers/dell_emc/powermax/rest.py | 2 + 5 files changed, 195 insertions(+), 91 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/dell_emc/powermax/test_powermax.py b/cinder/tests/unit/volume/drivers/dell_emc/powermax/test_powermax.py index 94bf8cd42c3..4bff79e5166 100644 --- a/cinder/tests/unit/volume/drivers/dell_emc/powermax/test_powermax.py +++ b/cinder/tests/unit/volume/drivers/dell_emc/powermax/test_powermax.py @@ -101,6 +101,7 @@ class PowerMaxCommonData(object): rdf_managed_async_grp = "OS-%s-Asynchronous-rdf-sg" % rdf_group_name volume_id = '2b06255d-f5f0-4520-a953-b029196add6a' no_slo_sg_name = 'OS-HostX-No_SLO-OS-fibre-PG' + temp_snapvx = 'temp-00001-snapshot_for_clone' # connector info wwpn1 = "123456789012345" @@ -157,6 +158,15 @@ class PowerMaxCommonData(object): 'controller': {'host': '10.00.00.00'}, 'hostlunid': 3} + # snapshot info + snapshot_id = '390eeb4d-0f56-4a02-ba14-167167967014' + snapshot_display_id = 'my_snap' + managed_snap_id = 'OS-390eeb4d-0f56-4a02-ba14-167167967014' + test_snapshot_snap_name = 'OS-' + snapshot_id[:6] + snapshot_id[-9:] + + snap_location = {'snap_name': test_snapshot_snap_name, + 'source_id': device_id} + # cinder volume info ctx = context.RequestContext('admin', 'fake', True) provider_location = {'array': array, @@ -170,6 +180,14 @@ class PowerMaxCommonData(object): provider_location4 = {'array': six.text_type(uni_array), 'device_id': device_id} + provider_location_clone = {'array': array, + 'device_id': device_id, + 'snap_name': temp_snapvx, + 'source_device_id': device_id} + provider_location_snapshot = {'array': array, + 'device_id': device_id, + 'snap_name': test_snapshot_snap_name, + 'source_device_id': device_id} provider_location5 = {'array': remote_array, 'device_id': device_id} @@ -211,8 +229,6 @@ class PowerMaxCommonData(object): replication_driver_data=six.text_type(legacy_provider_location2), host=fake_host, volume_type=test_volume_type) - snapshot_id = '390eeb4d-0f56-4a02-ba14-167167967014' - test_clone_volume = fake_volume.fake_volume_obj( context=ctx, name='vol1', size=2, provider_auth=None, provider_location=six.text_type(provider_location2), @@ -226,13 +242,6 @@ class PowerMaxCommonData(object): volume_type=test_volume_type, host=fake_host, replication_driver_data=six.text_type(provider_location4)) - snapshot_display_id = 'my_snap' - managed_snap_id = 'OS-390eeb4d-0f56-4a02-ba14-167167967014' - test_snapshot_snap_name = 'OS-' + snapshot_id[:6] + snapshot_id[-9:] - - snap_location = {'snap_name': test_snapshot_snap_name, - 'source_id': device_id} - test_snapshot = fake_snapshot.fake_snapshot_obj( context=ctx, id=snapshot_id, name='my_snap', size=2, @@ -1494,7 +1503,7 @@ class PowerMaxUtilsTest(test.TestCase): def test_get_temp_snap_name(self): source_device_id = self.data.device_id - ref_name = "temp-00001-snapshot_for_clone" + ref_name = self.data.temp_snapvx snap_name = self.utils.get_temp_snap_name(source_device_id) self.assertEqual(ref_name, snap_name) @@ -4121,22 +4130,31 @@ class PowerMaxCommonTest(test.TestCase): def test_create_volume_from_snapshot(self): ref_model_update = ( {'provider_location': six.text_type( - self.data.provider_location)}) + deepcopy(self.data.provider_location_snapshot))}) model_update = self.common.create_volume_from_snapshot( self.data.test_clone_volume, self.data.test_snapshot) - self.assertEqual(ref_model_update, model_update) + self.assertEqual( + ast.literal_eval(ref_model_update['provider_location']), + ast.literal_eval(model_update['provider_location'])) # Test from legacy snapshot + ref_model_update = ( + {'provider_location': six.text_type( + deepcopy(self.data.provider_location_clone))}) model_update = self.common.create_volume_from_snapshot( self.data.test_clone_volume, self.data.test_legacy_snapshot) - self.assertEqual(ref_model_update, model_update) + self.assertEqual( + ast.literal_eval(ref_model_update['provider_location']), + ast.literal_eval(model_update['provider_location'])) def test_cloned_volume(self): ref_model_update = ( {'provider_location': six.text_type( - self.data.provider_location)}) + self.data.provider_location_clone)}) model_update = self.common.create_cloned_volume( self.data.test_clone_volume, self.data.test_volume) - self.assertEqual(ref_model_update, model_update) + self.assertEqual( + ast.literal_eval(ref_model_update['provider_location']), + ast.literal_eval(model_update['provider_location'])) def test_delete_volume(self): with mock.patch.object(self.common, '_delete_volume') as mock_delete: @@ -4651,7 +4669,7 @@ class PowerMaxCommonTest(test.TestCase): volume = self.data.test_clone_volume source_volume = self.data.test_volume extra_specs = self.data.extra_specs - ref_dict = self.data.provider_location + ref_dict = self.data.provider_location_clone clone_dict = self.common._create_cloned_volume( volume, source_volume, extra_specs) self.assertEqual(ref_dict, clone_dict) @@ -4669,7 +4687,7 @@ class PowerMaxCommonTest(test.TestCase): volume = self.data.test_clone_volume source_volume = self.data.test_snapshot extra_specs = self.data.extra_specs - ref_dict = self.data.provider_location + ref_dict = self.data.provider_location_snapshot clone_dict = self.common._create_cloned_volume( volume, source_volume, extra_specs, False, True) self.assertEqual(ref_dict, clone_dict) @@ -5043,7 +5061,7 @@ class PowerMaxCommonTest(test.TestCase): clone_volume = self.data.test_clone_volume source_device_id = self.data.device_id snap_name = self.data.snap_location['snap_name'] - ref_dict = self.data.provider_location + ref_dict = self.data.provider_location_snapshot clone_dict = self.common._create_replica( array, clone_volume, source_device_id, self.data.extra_specs, snap_name) @@ -5054,7 +5072,7 @@ class PowerMaxCommonTest(test.TestCase): clone_volume = self.data.test_clone_volume source_device_id = self.data.device_id snap_name = "temp-" + source_device_id + "-snapshot_for_clone" - ref_dict = self.data.provider_location + ref_dict = self.data.provider_location_clone with mock.patch.object( self.utils, 'get_temp_snap_name', return_value=snap_name) as mock_get_snap: @@ -8254,7 +8272,7 @@ class PowerMaxCommonReplicationTest(test.TestCase): return_value=({}, {})) as mock_rep: self.common.create_cloned_volume( self.data.test_clone_volume, self.data.test_volume) - volume_dict = self.data.provider_location + volume_dict = self.data.provider_location_clone mock_rep.assert_called_once_with( self.data.test_clone_volume, self.data.test_clone_volume.name, volume_dict, extra_specs) @@ -8266,7 +8284,7 @@ class PowerMaxCommonReplicationTest(test.TestCase): return_value=({}, {})) as mock_rep: self.common.create_volume_from_snapshot( self.data.test_clone_volume, self.data.test_snapshot) - volume_dict = self.data.provider_location + volume_dict = self.data.provider_location_snapshot mock_rep.assert_called_once_with( self.data.test_clone_volume, "snapshot-%s" % self.data.snapshot_id, diff --git a/cinder/volume/drivers/dell_emc/powermax/common.py b/cinder/volume/drivers/dell_emc/powermax/common.py index 7fef5d99ff1..0c6f13af215 100644 --- a/cinder/volume/drivers/dell_emc/powermax/common.py +++ b/cinder/volume/drivers/dell_emc/powermax/common.py @@ -449,7 +449,7 @@ class PowerMaxCommon(object): self.volume_metadata.capture_create_volume( volume_dict['device_id'], volume, group_name, group_id, - extra_specs, rep_info_dict, 'create', None) + extra_specs, rep_info_dict, 'create') LOG.info("Leaving create_volume: %(name)s. Volume dict: %(dict)s.", {'name': volume_name, 'dict': volume_dict}) @@ -514,7 +514,7 @@ class PowerMaxCommon(object): self.volume_metadata.capture_create_volume( clone_dict['device_id'], volume, None, None, extra_specs, rep_info_dict, 'createFromSnapshot', - snapshot.id) + source_snapshot_id=snapshot.id) return model_update @@ -541,7 +541,8 @@ class PowerMaxCommon(object): self.volume_metadata.capture_create_volume( clone_dict['device_id'], clone_volume, None, None, extra_specs, rep_info_dict, 'createFromVolume', - None) + temporary_snapvx=clone_dict.get('snap_name'), + source_device_id=clone_dict.get('source_device_id')) return model_update def _replicate_volume(self, volume, volume_name, volume_dict, extra_specs, @@ -620,13 +621,10 @@ class PowerMaxCommon(object): elif not sourcedevice_id or not snap_name: LOG.info("No snapshot found on the array") else: - @coordination.synchronized("emc-source-{sourcedevice_id}") - def do_delete_volume_snap_check_for_links(sourcedevice_id): - # Ensure snap has not been recently deleted - self.provision.delete_volume_snap_check_for_links( - extra_specs[utils.ARRAY], snap_name, - sourcedevice_id, extra_specs) - do_delete_volume_snap_check_for_links(sourcedevice_id) + # Ensure snap has not been recently deleted + self.provision.delete_volume_snap_check_for_links( + extra_specs[utils.ARRAY], snap_name, + sourcedevice_id, extra_specs) LOG.info("Leaving delete_snapshot: %(ssname)s.", {'ssname': snap_name}) @@ -1619,8 +1617,14 @@ class PowerMaxCommon(object): :param volume: volume object to be deleted :returns: volume_name (string vol name) """ + source_device_id = None volume_name = volume.name extra_specs = self._initial_setup(volume) + prov_loc = volume.provider_location + + if isinstance(prov_loc, six.string_types): + name = ast.literal_eval(prov_loc) + source_device_id = name.get('source_device_id') device_id = self._find_device_on_array(volume, extra_specs) if device_id is None: @@ -1632,7 +1636,8 @@ class PowerMaxCommon(object): array = extra_specs[utils.ARRAY] # Check if the volume being deleted is a # source or target for copy session - self._sync_check(array, device_id, extra_specs) + self._sync_check(array, device_id, extra_specs, + source_device_id=source_device_id) # Remove from any storage groups and cleanup replication self._remove_vol_and_cleanup_replication( array, device_id, volume_name, extra_specs, volume) @@ -2020,6 +2025,9 @@ class PowerMaxCommon(object): clone_name, snap_name, extra_specs) # Re-throw the exception. raise + # add source id and snap_name to the clone dict + clone_dict['source_device_id'] = source_device_id + clone_dict['snap_name'] = snap_name return clone_dict def _cleanup_target( @@ -2044,14 +2052,39 @@ class PowerMaxCommon(object): array, target_device_id, clone_name, extra_specs) def _sync_check(self, array, device_id, extra_specs, - tgt_only=False): + tgt_only=False, source_device_id=None): """Check if volume is part of a SnapVx sync process. :param array: the array serial number :param device_id: volume instance :param tgt_only: Flag - return only sessions where device is target :param extra_specs: extra specifications - :param is_clone: Flag to specify if it is a clone operation + :param tgt_only: Flag to specify if it is a target + :param source_device_id: source_device_id if it has one + """ + if not source_device_id and tgt_only: + source_device_id = self._get_target_source_device( + array, device_id, tgt_only) + if source_device_id: + @coordination.synchronized("emc-source-{source_device_id}") + def do_unlink_and_delete_snap(source_device_id): + self._do_sync_check( + array, device_id, extra_specs, tgt_only) + + do_unlink_and_delete_snap(source_device_id) + else: + self._do_sync_check( + array, device_id, extra_specs, tgt_only) + + def _do_sync_check( + self, array, device_id, extra_specs, tgt_only=False): + """Check if volume is part of a SnapVx sync process. + + :param array: the array serial number + :param device_id: volume instance + :param tgt_only: Flag - return only sessions where device is target + :param extra_specs: extra specifications + :param tgt_only: Flag to specify if it is a target """ get_sessions = False snapvx_tgt, snapvx_src, __ = self.rest.is_vol_in_rep_session( @@ -2074,8 +2107,10 @@ class PowerMaxCommon(object): # Break the replication relationship for target in targets: LOG.debug("Unlinking source from target. Source: " - "%(volume)s, Target: %(target)s.", - {'volume': source, 'target': target[0]}) + "%(volume)s, Target: %(target)s, " + "generation: %(generation)s.", + {'volume': source, 'target': target[0], + 'generation': generation}) self.provision.break_replication_relationship( array, target[0], source, snap_name, extra_specs, generation) @@ -2083,11 +2118,43 @@ class PowerMaxCommon(object): # legacy volumes) if it is a temporary volume. # Only then is it a candidate for deletion. if 'temp' in snap_name or 'EMC_SMI' in snap_name: - @coordination.synchronized("emc-source-{source}") - def do_delete_temp_volume_snap(source): - self.provision.delete_temp_volume_snap( - array, snap_name, source, generation) - do_delete_temp_volume_snap(source) + LOG.debug("Deleting temporary snapshot. Source: " + "%(volume)s, snap name: %(snap_name)s, " + "generation: %(generation)s.", + {'volume': source, 'snap_name': snap_name, + 'generation': generation}) + self.provision.delete_temp_volume_snap( + array, snap_name, source, generation) + + def _get_target_source_device( + self, array, device_id, tgt_only=False): + """Get the source device id of the target. + + :param array: the array serial number + :param device_id: volume instance + :param tgt_only: Flag - return only sessions where device is target + return source_device_id + """ + LOG.debug("Getting source device id from target %(target)s.", + {'target': device_id}) + get_sessions = False + source_device_id = None + snapvx_tgt, snapvx_src, __ = self.rest.is_vol_in_rep_session( + array, device_id) + if snapvx_tgt: + get_sessions = True + elif snapvx_src and not tgt_only: + get_sessions = True + if get_sessions: + snap_vx_sessions = self.rest.find_snap_vx_sessions( + array, device_id, tgt_only) + if snap_vx_sessions: + snap_vx_sessions.sort( + key=lambda k: k['generation'], reverse=True) + for session in snap_vx_sessions: + source_device_id = session['source_vol'] + break + return source_device_id def _clone_check(self, array, device_id, extra_specs): """Perform any snapvx cleanup before creating clones or snapshots @@ -2099,14 +2166,18 @@ class PowerMaxCommon(object): snapvx_tgt, snapvx_src, __ = self.rest.is_vol_in_rep_session( array, device_id) if snapvx_src or snapvx_tgt: - snap_vx_sessions = self.rest.find_snap_vx_sessions( - array, device_id) - if snap_vx_sessions: - snap_vx_sessions.sort( - key=lambda k: k['generation'], reverse=True) - self._break_relationship( - snap_vx_sessions, snapvx_tgt, snapvx_src, array, - extra_specs) + @coordination.synchronized("emc-source-{src_device_id}") + def do_unlink_and_delete_snap(src_device_id): + snap_vx_sessions = self.rest.find_snap_vx_sessions( + array, src_device_id) + if snap_vx_sessions: + snap_vx_sessions.sort( + key=lambda k: k['generation'], reverse=True) + self._break_relationship( + snap_vx_sessions, snapvx_tgt, src_device_id, array, + extra_specs) + + do_unlink_and_delete_snap(device_id) def _break_relationship( self, snap_vx_sessions, snapvx_tgt, snapvx_src, array, @@ -2171,13 +2242,9 @@ class PowerMaxCommon(object): # For older styled temp snapshots for clone # do a delete as well if is_temp: - @coordination.synchronized( - "emc-source-{source}") - def do_delete_temp_volume_snap(source): - self.provision.delete_temp_volume_snap( - array, session['snap_name'], source, session['generation']) - - do_delete_temp_volume_snap(session['source_vol']) + self.provision.delete_temp_volume_snap( + array, session['snap_name'], session['source_vol'], + session['generation']) def manage_existing(self, volume, external_ref): """Manages an existing PowerMax/VMAX Volume (import to Cinder). diff --git a/cinder/volume/drivers/dell_emc/powermax/metadata.py b/cinder/volume/drivers/dell_emc/powermax/metadata.py index 0a013352262..e2e90881211 100644 --- a/cinder/volume/drivers/dell_emc/powermax/metadata.py +++ b/cinder/volume/drivers/dell_emc/powermax/metadata.py @@ -446,7 +446,8 @@ class PowerMaxVolumeMetadata(object): @debug_required def capture_create_volume( self, device_id, volume, group_name, group_id, extra_specs, - rep_info_dict, successful_operation, source_snapshot_id): + rep_info_dict, successful_operation, source_snapshot_id=None, + source_device_id=None, temporary_snapvx=None): """Captures create volume info in volume metadata :param device_id: device id @@ -457,6 +458,7 @@ class PowerMaxVolumeMetadata(object): :param rep_info_dict: information gathered from replication :param successful_operation: the type of create operation :param source_snapshot_id: the source snapshot id + :returns: volume_metadata dict """ rdf_group_no, target_name, remote_array, target_device_id = ( @@ -496,7 +498,10 @@ class PowerMaxVolumeMetadata(object): rdf_group_label=rdf_group_label, use_bias=use_bias, is_compression_disabled=( 'yes' if self.utils.is_compression_disabled( - extra_specs) else 'no') + extra_specs) else 'no'), + source_device_id=source_device_id, + temporary_snapvx=temporary_snapvx + ) volume_metadata = self.update_volume_info_metadata( datadict, self.version_dict) diff --git a/cinder/volume/drivers/dell_emc/powermax/provision.py b/cinder/volume/drivers/dell_emc/powermax/provision.py index 12c8ef66410..1ad38500915 100644 --- a/cinder/volume/drivers/dell_emc/powermax/provision.py +++ b/cinder/volume/drivers/dell_emc/powermax/provision.py @@ -129,14 +129,19 @@ class PowerMaxProvision(object): :param extra_specs: the extra specifications :param ttl: time to live in hours, defaults to 0 """ - start_time = time.time() - LOG.debug("Create Snap Vx snapshot of: %(source)s.", - {'source': source_device_id}) - self.rest.create_volume_snap( - array, snap_name, source_device_id, extra_specs, ttl) - LOG.debug("Create volume snapVx took: %(delta)s H:MM:SS.", - {'delta': self.utils.get_time_delta(start_time, - time.time())}) + @coordination.synchronized("emc-snapvx-{src_device_id}") + def do_create_volume_snap(src_device_id): + start_time = time.time() + LOG.debug("Create Snap Vx snapshot of: %(source)s.", + {'source': src_device_id}) + + self.rest.create_volume_snap( + array, snap_name, src_device_id, extra_specs, ttl) + LOG.debug("Create volume snapVx took: %(delta)s H:MM:SS.", + {'delta': self.utils.get_time_delta(start_time, + time.time())}) + + do_create_volume_snap(source_device_id) def create_volume_replica( self, array, source_device_id, target_device_id, @@ -156,9 +161,14 @@ class PowerMaxProvision(object): self.create_volume_snapvx(array, source_device_id, snap_name, extra_specs, ttl=1) # Link source to target - self.rest.modify_volume_snap( - array, source_device_id, target_device_id, snap_name, - extra_specs, link=True) + + @coordination.synchronized("emc-snapvx-{src_device_id}") + def do_modify_volume_snap(src_device_id): + self.rest.modify_volume_snap( + array, src_device_id, target_device_id, snap_name, + extra_specs, link=True) + + do_modify_volume_snap(source_device_id) LOG.debug("Create element replica took: %(delta)s H:MM:SS.", {'delta': self.utils.get_time_delta(start_time, @@ -176,13 +186,17 @@ class PowerMaxProvision(object): :param extra_specs: extra specifications :param generation: the generation number of the snapshot """ - LOG.debug("Break snap vx link relationship between: %(src)s " - "and: %(tgt)s.", - {'src': source_device_id, 'tgt': target_device_id}) + @coordination.synchronized("emc-snapvx-{src_device_id}") + def do_unlink_volume(src_device_id): + LOG.debug("Break snap vx link relationship between: %(src)s " + "and: %(tgt)s.", + {'src': src_device_id, 'tgt': target_device_id}) - self._unlink_volume(array, source_device_id, target_device_id, - snap_name, extra_specs, - list_volume_pairs=None, generation=generation) + self._unlink_volume(array, src_device_id, target_device_id, + snap_name, extra_specs, + list_volume_pairs=None, generation=generation) + + do_unlink_volume(source_device_id) def _unlink_volume( self, array, source_device_id, target_device_id, snap_name, @@ -198,7 +212,6 @@ class PowerMaxProvision(object): :param generation: the generation number of the snapshot :return: return code """ - def _unlink_vol(): """Called at an interval until the synchronization is finished. @@ -240,10 +253,14 @@ class PowerMaxProvision(object): :param restored: Flag to indicate if restored session is being deleted :param generation: the snapshot generation number """ - LOG.debug("Delete SnapVx: %(snap_name)s for volume %(vol)s.", - {'vol': source_device_id, 'snap_name': snap_name}) - self.rest.delete_volume_snap( - array, snap_name, source_device_id, restored, generation) + @coordination.synchronized("emc-snapvx-{src_device_id}") + def do_delete_volume_snap(src_device_id): + LOG.debug("Delete SnapVx: %(snap_name)s for volume %(vol)s.", + {'vol': src_device_id, 'snap_name': snap_name}) + self.rest.delete_volume_snap( + array, snap_name, src_device_id, restored, generation) + + do_delete_volume_snap(source_device_id) def is_restore_complete(self, array, source_device_id, snap_name, extra_specs): @@ -323,17 +340,12 @@ class PowerMaxProvision(object): :param source_device_id: the source device id :param generation: the generation number for the snapshot """ - - @coordination.synchronized("emc-snapvx-{snapvx_name}") - def do_delete_temp_snap(snapvx_name): - # Ensure snap has not been recently deleted - if self.rest.get_volume_snap( - array, source_device_id, snapvx_name, generation): - self.delete_volume_snap( - array, snapvx_name, source_device_id, - restored=False, generation=generation) - - do_delete_temp_snap(snap_name) + snapvx = self.rest.get_volume_snap( + array, source_device_id, snap_name, generation) + if snapvx: + self.delete_volume_snap( + array, snap_name, source_device_id, + restored=False, generation=generation) def delete_volume_snap_check_for_links( self, array, snap_name, source_devices, extra_specs, generation=0): diff --git a/cinder/volume/drivers/dell_emc/powermax/rest.py b/cinder/volume/drivers/dell_emc/powermax/rest.py index 580fd003fc8..8440c172074 100644 --- a/cinder/volume/drivers/dell_emc/powermax/rest.py +++ b/cinder/volume/drivers/dell_emc/powermax/rest.py @@ -1816,6 +1816,8 @@ class PowerMaxRest(object): "generation": int(generation)} if restored: payload.update({"restore": True}) + LOG.debug("The payload is %(payload)s.", + {'payload': payload}) return self.delete_resource( array, REPLICATION, 'snapshot', snap_name, payload=payload, private='/private')