Merge "[NetApp] Fix falsely report migration cancelation success" into stable/ussuri

This commit is contained in:
Zuul 2020-07-20 22:21:54 +00:00 committed by Gerrit Code Review
commit 4f77cf1826
4 changed files with 103 additions and 14 deletions

View File

@ -2534,6 +2534,8 @@ class NetAppCmodeFileStorageLibrary(object):
"""Abort an ongoing migration.""" """Abort an ongoing migration."""
vserver, vserver_client = self._get_vserver(share_server=share_server) vserver, vserver_client = self._get_vserver(share_server=share_server)
share_volume = self._get_backend_share_name(source_share['id']) share_volume = self._get_backend_share_name(source_share['id'])
retries = (self.configuration.netapp_migration_cancel_timeout / 5 or
1)
try: try:
self._get_volume_move_status(source_share, share_server) self._get_volume_move_status(source_share, share_server)
@ -2543,6 +2545,33 @@ class NetAppCmodeFileStorageLibrary(object):
self._client.abort_volume_move(share_volume, vserver) self._client.abort_volume_move(share_volume, vserver)
@manila_utils.retry(exception.InUse, interval=5,
retries=retries, backoff_rate=1)
def wait_for_migration_cancel_complete():
move_status = self._get_volume_move_status(source_share,
share_server)
if move_status['state'] == 'failed':
return
else:
msg = "Migration cancelation isn't finished yet."
raise exception.InUse(message=msg)
try:
wait_for_migration_cancel_complete()
except exception.InUse:
move_status = self._get_volume_move_status(source_share,
share_server)
msg_args = {
'share_move_state': move_status['state']
}
msg = _("Migration cancelation was not successful. The share "
"migration state failed while transitioning from "
"%(share_move_state)s state to 'failed'. Retries "
"exhausted.") % msg_args
raise exception.NetAppException(message=msg)
except exception.NetAppException:
LOG.exception("Could not get volume move status.")
msg = ("Share volume move operation for share %(shr)s from host " msg = ("Share volume move operation for share %(shr)s from host "
"%(src)s to %(dest)s was successfully aborted.") "%(src)s to %(dest)s was successfully aborted.")
msg_args = { msg_args = {

View File

@ -156,7 +156,13 @@ netapp_data_motion_opts = [
default=3600, # One Hour, default=3600, # One Hour,
help='The maximum time in seconds to wait for the completion ' help='The maximum time in seconds to wait for the completion '
'of a volume clone split operation in order to start a ' 'of a volume clone split operation in order to start a '
'volume move.'), ] 'volume move.'),
cfg.IntOpt('netapp_migration_cancel_timeout',
min=0,
default=3600, # One Hour,
help='The maximum time in seconds that migration cancel '
'waits for all migration operations be completely '
'aborted.'), ]
CONF = cfg.CONF CONF = cfg.CONF
CONF.register_opts(netapp_proxy_opts) CONF.register_opts(netapp_proxy_opts)

View File

@ -5242,35 +5242,69 @@ class NetAppFileStorageLibraryTestCase(test.TestCase):
self.assertDictMatch(expected_progress, migration_progress) self.assertDictMatch(expected_progress, migration_progress)
mock_info_log.assert_called_once() mock_info_log.assert_called_once()
@ddt.data(utils.annotated('already_canceled', (True, )), @ddt.data({'state': 'failed'},
utils.annotated('not_canceled_yet', (False, ))) {'state': 'healthy'})
def test_migration_cancel(self, already_canceled): @ddt.unpack
def test_migration_cancel(self, state):
source_snapshots = mock.Mock() source_snapshots = mock.Mock()
snapshot_mappings = mock.Mock() snapshot_mappings = mock.Mock()
already_canceled = already_canceled[0] self.library.configuration.netapp_migration_cancel_timeout = 15
mock_exception_log = self.mock_object(lib_base.LOG, 'exception')
mock_info_log = self.mock_object(lib_base.LOG, 'info') mock_info_log = self.mock_object(lib_base.LOG, 'info')
vol_move_side_effect = (exception.NetAppException
if already_canceled else None)
self.mock_object(self.library, '_get_vserver', self.mock_object(self.library, '_get_vserver',
mock.Mock(return_value=(fake.VSERVER1, mock.Mock()))) mock.Mock(return_value=(fake.VSERVER1, mock.Mock())))
self.mock_object(self.library, '_get_backend_share_name', self.mock_object(self.library, '_get_backend_share_name',
mock.Mock(return_value=fake.SHARE_NAME)) mock.Mock(return_value=fake.SHARE_NAME))
self.mock_object(self.client, 'abort_volume_move') self.mock_object(self.client, 'abort_volume_move')
self.mock_object(self.client, 'get_volume_move_status', self.mock_object(self.client, 'get_volume_move_status',
mock.Mock(side_effect=vol_move_side_effect)) mock.Mock(return_value={'state': state}))
retval = self.library.migration_cancel( if state == 'failed':
retval = self.library.migration_cancel(
self.context, fake_share.fake_share_instance(),
fake_share.fake_share_instance(), source_snapshots,
snapshot_mappings, share_server=fake.SHARE_SERVER,
destination_share_server='dst_srv')
self.assertIsNone(retval)
mock_info_log.assert_called_once()
else:
self.assertRaises(
(exception.NetAppException),
self.library.migration_cancel, self.context,
fake_share.fake_share_instance(),
fake_share.fake_share_instance, source_snapshots,
snapshot_mappings, share_server=fake.SHARE_SERVER,
destination_share_server='dst_srv')
@ddt.data({'already_canceled': True, 'effect': exception.NetAppException},
{'already_canceled': False, 'effect':
(None, exception.NetAppException)})
@ddt.unpack
def test_migration_cancel_exception_volume_status(self, already_canceled,
effect):
source_snapshots = mock.Mock()
snapshot_mappings = mock.Mock()
self.library.configuration.netapp_migration_cancel_timeout = 1
mock_exception_log = self.mock_object(lib_base.LOG, 'exception')
mock_info_log = self.mock_object(lib_base.LOG, 'info')
self.mock_object(self.library, '_get_vserver',
mock.Mock(return_value=(fake.VSERVER1, mock.Mock())))
self.mock_object(self.library, '_get_backend_share_name',
mock.Mock(return_value=fake.SHARE_NAME))
self.mock_object(self.client, 'abort_volume_move')
self.mock_object(self.client, 'get_volume_move_status',
mock.Mock(side_effect=effect))
self.library.migration_cancel(
self.context, fake_share.fake_share_instance(), self.context, fake_share.fake_share_instance(),
fake_share.fake_share_instance(), source_snapshots, fake_share.fake_share_instance(), source_snapshots,
snapshot_mappings, share_server=fake.SHARE_SERVER, snapshot_mappings, share_server=fake.SHARE_SERVER,
destination_share_server='dst_srv') destination_share_server='dst_srv')
self.assertIsNone(retval) mock_exception_log.assert_called_once()
if already_canceled: if not already_canceled:
mock_exception_log.assert_called_once()
else:
mock_info_log.assert_called_once() mock_info_log.assert_called_once()
self.assertEqual(not already_canceled, self.assertEqual(not already_canceled,
self.client.abort_volume_move.called) self.client.abort_volume_move.called)

View File

@ -0,0 +1,20 @@
---
fixes:
- |
NetApp ONTAP share delete operation can fail sometimes when is triggered
immediately after migration cancelation on a overloaded NetApp backend.
Canceling an ongoing migration is an asynchronous operation on an
ONTAP storage system.
Now the NetApp driver checks if the asynchronous API has ended its
operation before reporting migration cancelation success. If the operation
of the asynchronous API did not end within the specified timeout, the
``migration cancel`` cancel operation will be considered unsuccessful.
To do so, a new configuration option ``netapp_migration_cancel_timeout``
has been added.
upgrade:
- |
The configuration option ``netapp_migration_cancel_timeout`` can be
specified in the NetApp backend section to redefine the amount of time
that the NetApp driver must attempt to wait on the asynchronous
operation to cancel an ongoing migration. This option is set to 3600
seconds by default, which is sufficient time in most cases.