Retry unmount operation on the LVM driver

When a share is mounted on the same host as the manila-share
process, the kernel prevents us from destroying the
mount directory until the share has been cleanly unmounted
from the host. Kernel mounts can take a few seconds to get
unmounted fully especially when there are a lot of
linux namespaces that the mountpoint has been shared to.

Add a retry on these operations to harden the deletion
process and prevent spurious failures.

Change-Id: I3c1a2ec19d6bc18638db0875519ce60f2c89f33a
Closes-Bug: #1903773
Related-Bug: #1896672
Signed-off-by: Goutham Pacha Ravi <gouthampravi@gmail.com>
This commit is contained in:
Goutham Pacha Ravi 2020-11-11 23:25:32 -08:00
parent e3fea14788
commit 8a691d8631
3 changed files with 57 additions and 27 deletions

View File

@ -33,7 +33,8 @@ from manila import exception
from manila.i18n import _
from manila.share import driver
from manila.share.drivers import generic
from manila.share import utils
from manila.share import utils as share_utils
from manila import utils
LOG = log.getLogger(__name__)
@ -258,28 +259,36 @@ class LVMShareDriver(LVMMixin, driver.ShareDriver):
return location
def delete_share(self, context, share, share_server=None):
self._unmount_device(share, raise_if_missing=False)
self._unmount_device(share, raise_if_missing=False,
retry_busy_device=True)
self._delete_share(context, share)
self._deallocate_container(share['name'])
def _unmount_device(self, share_or_snapshot, raise_if_missing=True):
def _unmount_device(self, share_or_snapshot, raise_if_missing=True,
retry_busy_device=False):
"""Unmount the filesystem of a share or snapshot LV."""
mount_path = self._get_mount_path(share_or_snapshot)
if os.path.exists(mount_path):
# umount, may be busy
try:
self._execute('umount', '-f', mount_path, run_as_root=True)
except exception.ProcessExecutionError as exc:
if 'device is busy' in exc.stderr.lower():
raise exception.ShareBusyException(
reason=share_or_snapshot['name'])
elif 'not mounted' in exc.stderr.lower():
if raise_if_missing:
LOG.error('Unable to find device: %s', exc)
retries = 10 if retry_busy_device else 1
@utils.retry(exception.ShareBusyException, retries=retries)
def _unmount_device_with_retry():
try:
self._execute('umount', '-f', mount_path, run_as_root=True)
except exception.ProcessExecutionError as exc:
if 'is busy' in exc.stderr.lower():
raise exception.ShareBusyException(
reason=share_or_snapshot['name'])
elif 'not mounted' in exc.stderr.lower():
if raise_if_missing:
LOG.error('Unable to find device: %s', exc)
raise
else:
LOG.error('Unable to umount: %s', exc)
raise
else:
LOG.error('Unable to umount: %s', exc)
raise
_unmount_device_with_retry()
# remove dir
self._execute('rmdir', mount_path, run_as_root=True)
@ -416,7 +425,7 @@ class LVMShareDriver(LVMMixin, driver.ShareDriver):
share['name'],
share_access_rules,
[], [])
snapshot_access_rules, __, __ = utils.change_rules_to_readonly(
snapshot_access_rules, __, __ = share_utils.change_rules_to_readonly(
snapshot_access_rules, [], [])
self._get_helper(share).update_access(self.share_server,
snapshot['name'],
@ -478,8 +487,10 @@ class LVMShareDriver(LVMMixin, driver.ShareDriver):
:param share_server: None or Share server model
"""
helper = self._get_helper(snapshot['share'])
access_rules, add_rules, delete_rules = utils.change_rules_to_readonly(
access_rules, add_rules, delete_rules)
access_rules, add_rules, delete_rules = (
share_utils.change_rules_to_readonly(
access_rules, add_rules, delete_rules)
)
helper.update_access(self.share_server,
snapshot['name'], access_rules,
@ -515,5 +526,5 @@ class LVMShareDriver(LVMMixin, driver.ShareDriver):
def get_backend_info(self, context):
return {
'export_ips': ','.join(self.share_server['public_addresses']),
'db_version': utils.get_recent_db_migration_id(),
'db_version': share_utils.get_recent_db_migration_id(),
}

View File

@ -468,16 +468,29 @@ class LVMShareDriverTestCase(test.TestCase):
def _get_mount_path(self, share):
return os.path.join(CONF.lvm_share_export_root, share['name'])
def test__unmount_device(self):
@ddt.data(True, False)
def test__unmount_device_with_retry_busy_device(self, retry_busy_device):
execute_sideeffects = [
exception.ProcessExecutionError(stderr='device is busy'),
exception.ProcessExecutionError(stderr='target is busy'),
None, None
] if retry_busy_device else [None, None]
mount_path = self._get_mount_path(self.share)
self._os.path.exists.return_value = True
self.mock_object(self._driver, '_execute')
self._driver._unmount_device(self.share)
self._driver._execute.assert_any_call('umount', '-f', mount_path,
run_as_root=True)
self._driver._execute.assert_any_call('rmdir', mount_path,
run_as_root=True)
self.mock_object(self._driver, '_execute', mock.Mock(
side_effect=execute_sideeffects))
self._driver._unmount_device(self.share,
retry_busy_device=retry_busy_device)
num_of_times_umount_is_called = 3 if retry_busy_device else 1
self._os.path.exists.assert_called_with(mount_path)
self._driver._execute.assert_has_calls([
mock.call('umount', '-f', mount_path, run_as_root=True),
] * num_of_times_umount_is_called + [
mock.call('rmdir', mount_path, run_as_root=True)
])
def test_extend_share(self):
local_path = self._driver._get_local_path(self.share)

View File

@ -0,0 +1,6 @@
---
fixes:
- |
Share cleanup for the LVM driver has been enhanced to retry on known
errors that could occur due to mount propagation. See `bug 1903773
<https://bugs.launchpad.net/manila/+bug/1903773>`_ for more details.