Browse Source

Merge "[NetApp] Fix falsely report migration cancelation success" into stable/ussuri

tags/10.0.1
Zuul 2 weeks ago
committed by Gerrit Code Review
parent
commit
4f77cf1826
4 changed files with 103 additions and 14 deletions
  1. +29
    -0
      manila/share/drivers/netapp/dataontap/cluster_mode/lib_base.py
  2. +7
    -1
      manila/share/drivers/netapp/options.py
  3. +47
    -13
      manila/tests/share/drivers/netapp/dataontap/cluster_mode/test_lib_base.py
  4. +20
    -0
      releasenotes/notes/bug-1688620-netapp-migration-cancelation-fb913131eb8eb82a.yaml

+ 29
- 0
manila/share/drivers/netapp/dataontap/cluster_mode/lib_base.py View File

@@ -2534,6 +2534,8 @@ class NetAppCmodeFileStorageLibrary(object):
"""Abort an ongoing migration."""
vserver, vserver_client = self._get_vserver(share_server=share_server)
share_volume = self._get_backend_share_name(source_share['id'])
retries = (self.configuration.netapp_migration_cancel_timeout / 5 or
1)

try:
self._get_volume_move_status(source_share, share_server)
@@ -2543,6 +2545,33 @@ class NetAppCmodeFileStorageLibrary(object):

self._client.abort_volume_move(share_volume, vserver)

@manila_utils.retry(exception.InUse, interval=5,
retries=retries, backoff_rate=1)
def wait_for_migration_cancel_complete():
move_status = self._get_volume_move_status(source_share,
share_server)
if move_status['state'] == 'failed':
return
else:
msg = "Migration cancelation isn't finished yet."
raise exception.InUse(message=msg)

try:
wait_for_migration_cancel_complete()
except exception.InUse:
move_status = self._get_volume_move_status(source_share,
share_server)
msg_args = {
'share_move_state': move_status['state']
}
msg = _("Migration cancelation was not successful. The share "
"migration state failed while transitioning from "
"%(share_move_state)s state to 'failed'. Retries "
"exhausted.") % msg_args
raise exception.NetAppException(message=msg)
except exception.NetAppException:
LOG.exception("Could not get volume move status.")

msg = ("Share volume move operation for share %(shr)s from host "
"%(src)s to %(dest)s was successfully aborted.")
msg_args = {


+ 7
- 1
manila/share/drivers/netapp/options.py View File

@@ -156,7 +156,13 @@ netapp_data_motion_opts = [
default=3600, # One Hour,
help='The maximum time in seconds to wait for the completion '
'of a volume clone split operation in order to start a '
'volume move.'), ]
'volume move.'),
cfg.IntOpt('netapp_migration_cancel_timeout',
min=0,
default=3600, # One Hour,
help='The maximum time in seconds that migration cancel '
'waits for all migration operations be completely '
'aborted.'), ]

CONF = cfg.CONF
CONF.register_opts(netapp_proxy_opts)


+ 47
- 13
manila/tests/share/drivers/netapp/dataontap/cluster_mode/test_lib_base.py View File

@@ -5242,35 +5242,69 @@ class NetAppFileStorageLibraryTestCase(test.TestCase):
self.assertDictMatch(expected_progress, migration_progress)
mock_info_log.assert_called_once()

@ddt.data(utils.annotated('already_canceled', (True, )),
utils.annotated('not_canceled_yet', (False, )))
def test_migration_cancel(self, already_canceled):
@ddt.data({'state': 'failed'},
{'state': 'healthy'})
@ddt.unpack
def test_migration_cancel(self, state):
source_snapshots = mock.Mock()
snapshot_mappings = mock.Mock()
already_canceled = already_canceled[0]
mock_exception_log = self.mock_object(lib_base.LOG, 'exception')
self.library.configuration.netapp_migration_cancel_timeout = 15
mock_info_log = self.mock_object(lib_base.LOG, 'info')
vol_move_side_effect = (exception.NetAppException
if already_canceled else None)

self.mock_object(self.library, '_get_vserver',
mock.Mock(return_value=(fake.VSERVER1, mock.Mock())))
self.mock_object(self.library, '_get_backend_share_name',
mock.Mock(return_value=fake.SHARE_NAME))
self.mock_object(self.client, 'abort_volume_move')
self.mock_object(self.client, 'get_volume_move_status',
mock.Mock(side_effect=vol_move_side_effect))
mock.Mock(return_value={'state': state}))

retval = self.library.migration_cancel(
if state == 'failed':
retval = self.library.migration_cancel(
self.context, fake_share.fake_share_instance(),
fake_share.fake_share_instance(), source_snapshots,
snapshot_mappings, share_server=fake.SHARE_SERVER,
destination_share_server='dst_srv')

self.assertIsNone(retval)
mock_info_log.assert_called_once()
else:
self.assertRaises(
(exception.NetAppException),
self.library.migration_cancel, self.context,
fake_share.fake_share_instance(),
fake_share.fake_share_instance, source_snapshots,
snapshot_mappings, share_server=fake.SHARE_SERVER,
destination_share_server='dst_srv')

@ddt.data({'already_canceled': True, 'effect': exception.NetAppException},
{'already_canceled': False, 'effect':
(None, exception.NetAppException)})
@ddt.unpack
def test_migration_cancel_exception_volume_status(self, already_canceled,
effect):
source_snapshots = mock.Mock()
snapshot_mappings = mock.Mock()
self.library.configuration.netapp_migration_cancel_timeout = 1
mock_exception_log = self.mock_object(lib_base.LOG, 'exception')
mock_info_log = self.mock_object(lib_base.LOG, 'info')
self.mock_object(self.library, '_get_vserver',
mock.Mock(return_value=(fake.VSERVER1, mock.Mock())))
self.mock_object(self.library, '_get_backend_share_name',
mock.Mock(return_value=fake.SHARE_NAME))
self.mock_object(self.client, 'abort_volume_move')
self.mock_object(self.client, 'get_volume_move_status',
mock.Mock(side_effect=effect))
self.library.migration_cancel(
self.context, fake_share.fake_share_instance(),
fake_share.fake_share_instance(), source_snapshots,
snapshot_mappings, share_server=fake.SHARE_SERVER,
destination_share_server='dst_srv')

self.assertIsNone(retval)
if already_canceled:
mock_exception_log.assert_called_once()
else:
mock_exception_log.assert_called_once()
if not already_canceled:
mock_info_log.assert_called_once()

self.assertEqual(not already_canceled,
self.client.abort_volume_move.called)



+ 20
- 0
releasenotes/notes/bug-1688620-netapp-migration-cancelation-fb913131eb8eb82a.yaml View File

@@ -0,0 +1,20 @@
---
fixes:
- |
NetApp ONTAP share delete operation can fail sometimes when is triggered
immediately after migration cancelation on a overloaded NetApp backend.
Canceling an ongoing migration is an asynchronous operation on an
ONTAP storage system.
Now the NetApp driver checks if the asynchronous API has ended its
operation before reporting migration cancelation success. If the operation
of the asynchronous API did not end within the specified timeout, the
``migration cancel`` cancel operation will be considered unsuccessful.
To do so, a new configuration option ``netapp_migration_cancel_timeout``
has been added.
upgrade:
- |
The configuration option ``netapp_migration_cancel_timeout`` can be
specified in the NetApp backend section to redefine the amount of time
that the NetApp driver must attempt to wait on the asynchronous
operation to cancel an ongoing migration. This option is set to 3600
seconds by default, which is sufficient time in most cases.

Loading…
Cancel
Save