From b300d878b4d82b315ce3dc76cf4dcf0c80f47b1b Mon Sep 17 00:00:00 2001 From: Fernando Ferraz Date: Tue, 19 Jul 2022 05:33:58 -0300 Subject: [PATCH] NetApp ONTAP: Fix SnapMirror snapshots not being cleaned up Replica promote is retaining unneeded snapshots from previous SnapMirror relationships and increasing the amount of space consumed from snapshots in the storage system. This patch fixes the issue by calling the snapmirror release operation after resync completes its transferring, which allows the SnapMirror software to properly cleanup unneeded resources. Closes-Bug: #1982808 Change-Id: I516fb3575e30d18d971d6a1b7f3b9ad7120c3bbd (cherry picked from commit 38b13bb40b879adcc06cef0b0fa629e4b00f7122) (cherry picked from commit e85a081f1c48fa02f728e077e09be08587c7c1e1) (cherry picked from commit 6db352411413818ddd6c02762fda5d67e0219361) (cherry picked from commit 65bcc27100abbbf838ff02735594da5bae3b05c1) --- .../dataontap/cluster_mode/data_motion.py | 30 +++++++++++ .../netapp/dataontap/cluster_mode/lib_base.py | 9 ++++ .../cluster_mode/test_data_motion.py | 54 +++++++++++++++++++ .../dataontap/cluster_mode/test_lib_base.py | 42 +++++++++++++++ ...shots-not-cleaned-up-63cc98cd468adbd1.yaml | 9 ++++ 5 files changed, 144 insertions(+) create mode 100644 releasenotes/notes/bug-1982808-netapp-fix-snapmirror-snapshots-not-cleaned-up-63cc98cd468adbd1.yaml diff --git a/manila/share/drivers/netapp/dataontap/cluster_mode/data_motion.py b/manila/share/drivers/netapp/dataontap/cluster_mode/data_motion.py index fb27994270..88822565c1 100644 --- a/manila/share/drivers/netapp/dataontap/cluster_mode/data_motion.py +++ b/manila/share/drivers/netapp/dataontap/cluster_mode/data_motion.py @@ -838,3 +838,33 @@ class DataMotionSession(object): msg = _("Unable to release the snapmirror from source volume %s. " "Retries exhausted. Aborting") % src_volume_name raise exception.NetAppException(message=msg) + + def cleanup_previous_snapmirror_relationships(self, replica, replica_list): + """Cleanup previous snapmirrors relationships for replica.""" + LOG.debug("Cleaning up old snapmirror relationships for replica %s.", + replica['id']) + src_vol_name, src_vserver, src_backend = ( + self.get_backend_info_for_share(replica)) + src_client = get_client_for_backend(src_backend, + vserver_name=src_vserver) + + # replica_list may contain the replica we are trying to clean up + destinations = (r for r in replica_list if r['id'] != replica['id']) + + for destination in destinations: + dest_vol_name, dest_vserver, _ = ( + self.get_backend_info_for_share(destination)) + try: + src_client.release_snapmirror_vol( + src_vserver, src_vol_name, dest_vserver, dest_vol_name) + except netapp_api.NaApiError as e: + if (e.code == netapp_api.EOBJECTNOTFOUND or + e.code == netapp_api.ESOURCE_IS_DIFFERENT or + "(entry doesn't exist)" in e.message): + LOG.debug( + 'Snapmirror destination %s no longer exists for ' + 'replica %s.', destination['id'], replica['id']) + else: + LOG.exception( + 'Error releasing snapmirror destination %s for ' + 'replica %s.', destination['id'], replica['id']) diff --git a/manila/share/drivers/netapp/dataontap/cluster_mode/lib_base.py b/manila/share/drivers/netapp/dataontap/cluster_mode/lib_base.py index c26331afb3..82db34c16a 100644 --- a/manila/share/drivers/netapp/dataontap/cluster_mode/lib_base.py +++ b/manila/share/drivers/netapp/dataontap/cluster_mode/lib_base.py @@ -2656,6 +2656,15 @@ class NetAppCmodeFileStorageLibrary(object): share_name)): return constants.REPLICA_STATE_OUT_OF_SYNC + # NOTE(sfernand): When promoting replicas, the previous source volume + # and its destinations are put in an 'out of sync' state and must be + # cleaned up once to avoid retaining unused snapshots from the previous + # relationship. Replicas already 'in-sync' won't try another cleanup + # attempt. + if replica['replica_state'] == constants.REPLICA_STATE_OUT_OF_SYNC: + dm_session.cleanup_previous_snapmirror_relationships( + replica, replica_list) + return constants.REPLICA_STATE_IN_SYNC def promote_replica(self, context, replica_list, replica, access_rules, diff --git a/manila/tests/share/drivers/netapp/dataontap/cluster_mode/test_data_motion.py b/manila/tests/share/drivers/netapp/dataontap/cluster_mode/test_data_motion.py index d7c0107d35..8ec12ee535 100644 --- a/manila/tests/share/drivers/netapp/dataontap/cluster_mode/test_data_motion.py +++ b/manila/tests/share/drivers/netapp/dataontap/cluster_mode/test_data_motion.py @@ -1179,3 +1179,57 @@ class NetAppCDOTDataMotionSessionTestCase(test.TestCase): src_mock_client.release_snapmirror_vol.assert_called_once_with( fake.VSERVER1, fake.SHARE_NAME, fake.VSERVER2, fake.SHARE_NAME2, relationship_info_only=False) + + @ddt.data([{'id': 'src_share'}, {'id': 'dst_share'}], + [{'id': 'dst_share'}]) + def test_cleanup_previous_snapmirror_relationships(self, replica_list): + mock_src_client = mock.Mock() + src_backend_info = ('src_share', 'src_vserver', 'src_backend') + dst_backend_info = ('dst_share', 'dst_vserver', 'dst_backend') + self.mock_object(self.dm_session, 'get_backend_info_for_share', + mock.Mock(side_effect=[src_backend_info, + dst_backend_info])) + self.mock_object(data_motion, 'get_client_for_backend', + mock.Mock(return_value=mock_src_client)) + self.mock_object(mock_src_client, 'release_snapmirror_vol') + + result = self.dm_session.cleanup_previous_snapmirror_relationships( + {'id': 'src_share'}, replica_list) + + data_motion.get_client_for_backend.assert_called_once_with( + 'src_backend', vserver_name='src_vserver') + self.dm_session.get_backend_info_for_share.assert_has_calls([ + mock.call({'id': 'src_share'}), + mock.call({'id': 'dst_share'}) + ]) + mock_src_client.release_snapmirror_vol.assert_called_once_with( + 'src_vserver', 'src_share', 'dst_vserver', 'dst_share') + + self.assertIsNone(result) + + @ddt.data(netapp_api.NaApiError(), + netapp_api.NaApiError(code=netapp_api.EOBJECTNOTFOUND), + netapp_api.NaApiError(code=netapp_api.ESOURCE_IS_DIFFERENT), + netapp_api.NaApiError(code='some_random_code', + message="(entry doesn't exist)"), + netapp_api.NaApiError(code='some_random_code', + message='(actually, entry does exist!)')) + def test_cleanup_previous_snapmirror_relationships_does_not_exist( + self, release_exception): + mock_src_client = mock.Mock() + self.mock_object(self.dm_session, 'get_backend_info_for_share', + mock.Mock(return_value=( + mock.Mock(), mock.Mock(), mock.Mock()))) + self.mock_object(data_motion, 'get_client_for_backend', + mock.Mock(return_value=mock_src_client)) + self.mock_object(mock_src_client, 'release_snapmirror_vol', + mock.Mock(side_effect=release_exception)) + + replica = {'id': 'src_share'} + replica_list = [replica, {'id': 'dst_share'}] + + result = self.dm_session.cleanup_previous_snapmirror_relationships( + replica, replica_list) + + mock_src_client.release_snapmirror_vol.assert_called() + self.assertIsNone(result) diff --git a/manila/tests/share/drivers/netapp/dataontap/cluster_mode/test_lib_base.py b/manila/tests/share/drivers/netapp/dataontap/cluster_mode/test_lib_base.py index c83bdeef85..73cb1108f4 100644 --- a/manila/tests/share/drivers/netapp/dataontap/cluster_mode/test_lib_base.py +++ b/manila/tests/share/drivers/netapp/dataontap/cluster_mode/test_lib_base.py @@ -4307,6 +4307,46 @@ class NetAppFileStorageLibraryTestCase(test.TestCase): fake.SHARE, None, [], share_server=None) + (self.mock_dm_session.cleanup_previous_snapmirror_relationships + .assert_not_called()) + self.assertEqual(constants.REPLICA_STATE_IN_SYNC, result) + + def test_update_replica_state_replica_change_to_in_sycn(self): + fake_snapmirror = { + 'mirror-state': 'snapmirrored', + 'relationship-status': 'idle', + 'last-transfer-end-timestamp': '%s' % float(time.time()) + } + # fake SHARE has replica_state set to active already + active_replica = fake.SHARE + out_of_sync_replica = copy.deepcopy(fake.SHARE) + out_of_sync_replica['replica_state'] = ( + constants.REPLICA_STATE_OUT_OF_SYNC) + replica_list = [out_of_sync_replica, active_replica] + vserver_client = mock.Mock() + self.mock_object(vserver_client, 'volume_exists', + mock.Mock(return_value=True)) + self.mock_object(self.library, + '_get_vserver', + mock.Mock(return_value=(fake.VSERVER1, + vserver_client))) + self.mock_dm_session.get_snapmirrors = mock.Mock( + return_value=[fake_snapmirror]) + mock_config = mock.Mock() + mock_config.safe_get = mock.Mock(return_value=0) + self.mock_object(data_motion, 'get_backend_configuration', + mock.Mock(return_value=mock_config)) + self.mock_object(self.library, + '_is_readable_replica', + mock.Mock(return_value=False)) + + result = self.library.update_replica_state( + None, replica_list, out_of_sync_replica, + None, [], share_server=None) + + # Expect a snapmirror cleanup as replica was in out of sync state + (self.mock_dm_session.cleanup_previous_snapmirror_relationships + .assert_called_once_with(out_of_sync_replica, replica_list)) self.assertEqual(constants.REPLICA_STATE_IN_SYNC, result) def test_update_replica_state_backend_volume_absent(self): @@ -4350,6 +4390,8 @@ class NetAppFileStorageLibraryTestCase(test.TestCase): fake.SHARE, None, snapshots, share_server=None) + (self.mock_dm_session.cleanup_previous_snapmirror_relationships + .assert_not_called()) self.assertEqual(constants.REPLICA_STATE_IN_SYNC, result) def test_update_replica_state_missing_snapshot(self): diff --git a/releasenotes/notes/bug-1982808-netapp-fix-snapmirror-snapshots-not-cleaned-up-63cc98cd468adbd1.yaml b/releasenotes/notes/bug-1982808-netapp-fix-snapmirror-snapshots-not-cleaned-up-63cc98cd468adbd1.yaml new file mode 100644 index 0000000000..765b52f78a --- /dev/null +++ b/releasenotes/notes/bug-1982808-netapp-fix-snapmirror-snapshots-not-cleaned-up-63cc98cd468adbd1.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + NetApp driver `bug #1982808 + `_: Fixed issue + preventing the storage system from proper clean up unused SnapMirror + snapshots after a replica promote, significantly increasing the amount + of space consumed in ONTAP volumes by snapshots. +