Add lock per share for cinder nfs mount/umount

With cinder configured as glance store and nfs as cinder backend,
if we create multiple images concurrently into the same share,
there might be race conditions.
This patch ensures a lock per export to avoid the race conditions.

This patch also introduces a reference counting mechamism for unmounting
which will unmount the share if no active thread/process is using it.

Closes-Bug: #1870289

Change-Id: I9197f64e29a0ae2e0a58186f1a70aa134f7f1db6
This commit is contained in:
whoami-rajat 2020-04-01 14:18:44 +00:00 committed by Rajat Dhasmana
parent 2aa892d4df
commit f5d4699613
9 changed files with 587 additions and 9 deletions

View File

@ -88,3 +88,11 @@ latex_documents = [
'%s Documentation' % project, '%s Documentation' % project,
'OpenStack Foundation', 'manual'), 'OpenStack Foundation', 'manual'),
] ]
# The autodoc module imports every module to check for import
# errors. Since the fs_mount module is self initializing, it
# requires configurations that aren't loaded till that time.
# It would never happen in a real scenario as it is only imported
# from cinder store after the config are loaded but to handle doc
# failures, we mock it here.
autodoc_mock_imports = ['glance_store.common.fs_mount']

View File

@ -10,3 +10,7 @@ disk_chown: RegExpFilter, chown, root, chown, \d+, /dev/(?!.*/\.\.).*
# This line ties the superuser privs with the config files, context name, # This line ties the superuser privs with the config files, context name,
# and (implicitly) the actual python code invoked. # and (implicitly) the actual python code invoked.
privsep-rootwrap: RegExpFilter, privsep-helper, root, privsep-helper, --config-file, /etc/(?!\.\.).*, --privsep_context, os_brick.privileged.default, --privsep_sock_path, /tmp/.* privsep-rootwrap: RegExpFilter, privsep-helper, root, privsep-helper, --config-file, /etc/(?!\.\.).*, --privsep_context, os_brick.privileged.default, --privsep_sock_path, /tmp/.*
chown: CommandFilter, chown, root
mount: CommandFilter, mount, root
umount: CommandFilter, umount, root

View File

@ -15,6 +15,7 @@
import contextlib import contextlib
import errno import errno
import hashlib import hashlib
import importlib
import logging import logging
import math import math
import os import os
@ -382,6 +383,9 @@ class Store(glance_store.driver.Store):
super(Store, self).__init__(*args, **kargs) super(Store, self).__init__(*args, **kargs)
if self.backend_group: if self.backend_group:
self._set_url_prefix() self._set_url_prefix()
# We are importing it here to let the config options load
# before we use them in the fs_mount file
self.mount = importlib.import_module('glance_store.common.fs_mount')
def get_root_helper(self): def get_root_helper(self):
if self.backend_group: if self.backend_group:
@ -525,6 +529,22 @@ class Store(glance_store.driver.Store):
raise exceptions.BackendException(msg) raise exceptions.BackendException(msg)
return volume return volume
def get_hash_str(self, base_str):
"""Returns string that represents SHA256 hash of base_str (in hex format).
If base_str is a Unicode string, encode it to UTF-8.
"""
if isinstance(base_str, str):
base_str = base_str.encode('utf-8')
return hashlib.sha256(base_str).hexdigest()
def _get_mount_path(self, share, mount_point_base):
"""Returns the mount path prefix using the mount point base and share.
:returns: The mount path prefix.
"""
return os.path.join(mount_point_base, self.get_hash_str(share))
@contextlib.contextmanager @contextlib.contextmanager
def _open_cinder_volume(self, client, volume, mode): def _open_cinder_volume(self, client, volume, mode):
attach_mode = 'rw' if mode == 'wb' else 'ro' attach_mode = 'rw' if mode == 'wb' else 'ro'
@ -557,12 +577,25 @@ class Store(glance_store.driver.Store):
try: try:
connection_info = volume.initialize_connection(volume, properties) connection_info = volume.initialize_connection(volume, properties)
if connection_info['driver_volume_type'] == 'nfs':
connection_info['mount_point_base'] = os.path.join(
mount_point_base, 'nfs')
conn = connector.InitiatorConnector.factory( conn = connector.InitiatorConnector.factory(
connection_info['driver_volume_type'], root_helper, connection_info['driver_volume_type'], root_helper,
conn=connection_info) conn=connection_info)
if connection_info['driver_volume_type'] == 'nfs':
@utils.synchronized(connection_info['data']['export'])
def connect_volume_nfs():
data = connection_info['data']
export = data['export']
vol_name = data['name']
mountpoint = self._get_mount_path(
export,
os.path.join(mount_point_base, 'nfs'))
options = data['options']
self.mount.mount(
'nfs', export, vol_name, mountpoint, host,
root_helper, options)
return {'path': os.path.join(mountpoint, vol_name)}
device = connect_volume_nfs()
else:
device = conn.connect_volume(connection_info['data']) device = conn.connect_volume(connection_info['data'])
volume.attach(None, 'glance_store', attach_mode, host_name=host) volume.attach(None, 'glance_store', attach_mode, host_name=host)
volume = self._wait_volume_status(volume, 'attaching', 'in-use') volume = self._wait_volume_status(volume, 'attaching', 'in-use')
@ -571,8 +604,7 @@ class Store(glance_store.driver.Store):
yield device['path'] yield device['path']
else: else:
with self.temporary_chown( with self.temporary_chown(
device['path'], backend=self.backend_group device['path']), open(device['path'], mode) as f:
), open(device['path'], mode) as f:
yield f yield f
except Exception: except Exception:
LOG.exception(_LE('Exception while accessing to cinder volume ' LOG.exception(_LE('Exception while accessing to cinder volume '
@ -586,6 +618,14 @@ class Store(glance_store.driver.Store):
if device: if device:
try: try:
if connection_info['driver_volume_type'] == 'nfs':
@utils.synchronized(connection_info['data']['export'])
def disconnect_volume_nfs():
path, vol_name = device['path'].rsplit('/', 1)
self.mount.umount(vol_name, path, host,
root_helper)
disconnect_volume_nfs()
else:
conn.disconnect_volume(connection_info['data'], device) conn.disconnect_volume(connection_info['data'], device)
except Exception: except Exception:
LOG.exception(_LE('Failed to disconnect volume ' LOG.exception(_LE('Failed to disconnect volume '

View File

@ -0,0 +1,366 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import collections
import contextlib
import logging
import os
import socket
import threading
from oslo_concurrency import processutils
from oslo_config import cfg
from glance_store import exceptions
from glance_store.i18n import _LE, _LW
LOG = logging.getLogger(__name__)
HOST = socket.gethostname()
CONF = cfg.CONF
class HostMountStateManagerMeta(type):
_instance = {}
def __call__(cls, *args, **kwargs):
if cls not in cls._instance:
cls._instance[cls] = super(
HostMountStateManagerMeta, cls).__call__(*args, **kwargs)
return cls._instance[cls]
class _HostMountStateManager(metaclass=HostMountStateManagerMeta):
"""A global manager of filesystem mounts.
_HostMountStateManager manages a _HostMountState object for the current
glance node. Primarily it creates one on object initialization and returns
it via get_state().
_HostMountStateManager manages concurrency itself. Independent callers do
not need to consider interactions between multiple _HostMountStateManager
calls when designing their own locking.
"""
# Reset state of global _HostMountStateManager
state = None
use_count = 0
# Guards both state and use_count
cond = threading.Condition()
def __init__(self, host):
"""Initialise a new _HostMountState
We will block before creating a new state until all operations
using a previous state have completed.
:param host: host
"""
# Wait until all operations using a previous state are
# complete before initialising a new one. Note that self.state is
# already None, set either by initialisation or by host_down. This
# means the current state will not be returned to any new callers,
# and use_count will eventually reach zero.
# We do this to avoid a race between _HostMountState initialisation
# and an on-going mount/unmount operation
self.host = host
while self.use_count != 0:
self.cond.wait()
# Another thread might have initialised state while we were
# waiting
if self.state is None:
LOG.debug('Initialising _HostMountState')
self.state = _HostMountState()
backends = []
enabled_backends = CONF.enabled_backends
if enabled_backends:
for backend in enabled_backends:
if enabled_backends[backend] == 'cinder':
backends.append(backend)
else:
backends.append('glance_store')
for backend in backends:
mountpoint = getattr(CONF, backend).cinder_mount_point_base
# This is currently designed for cinder nfs backend only.
# Later can be modified to work with other *fs backends.
mountpoint = os.path.join(mountpoint, 'nfs')
# There will probably be the same rootwrap file for all stores,
# generalizing this will be done in a later refactoring
rootwrap = getattr(CONF, backend).rootwrap_config
rootwrap = ('sudo glance-rootwrap %s' % rootwrap)
dirs = []
# fetch the directories in the mountpoint path
if os.path.isdir(mountpoint):
dirs = os.listdir(mountpoint)
else:
continue
if not dirs:
return
for dir in dirs:
# for every directory in the mountpath, we
# unmount it (if mounted) and remove it
dir = os.path.join(mountpoint, dir)
with self.get_state() as mount_state:
if os.path.exists(dir) and not os.path.ismount(dir):
try:
os.rmdir(dir)
except Exception as ex:
LOG.debug(
"Couldn't remove directory"
"%(mountpoint)s: %(reason)s",
{'mountpoint': mountpoint,
'reason': ex})
else:
mount_state.umount(None, dir, HOST, rootwrap)
@contextlib.contextmanager
def get_state(self):
"""Return the current mount state.
_HostMountStateManager will not permit a new state object to be
created while any previous state object is still in use.
:rtype: _HostMountState
"""
# We hold the instance lock here so that if a _HostMountState is
# currently initialising we'll wait for it to complete rather than
# fail.
with self.cond:
state = self.state
if state is None:
LOG.error('Host not initialized')
raise exceptions.HostNotInitialized(host=self.host)
self.use_count += 1
try:
LOG.debug('Got _HostMountState')
yield state
finally:
with self.cond:
self.use_count -= 1
self.cond.notify_all()
class _HostMountState(object):
"""A data structure recording all managed mountpoints and the
attachments in use for each one. _HostMountState ensures that the glance
node only attempts to mount a single mountpoint in use by multiple
attachments once, and that it is not unmounted until it is no longer in use
by any attachments.
Callers should not create a _HostMountState directly, but should obtain
it via:
with mount.get_manager().get_state() as state:
state.mount(...)
_HostMountState manages concurrency itself. Independent callers do not need
to consider interactions between multiple _HostMountState calls when
designing their own locking.
"""
class _MountPoint(object):
"""A single mountpoint, and the set of attachments in use on it."""
def __init__(self):
# A guard for operations on this mountpoint
# N.B. Care is required using this lock, as it will be deleted
# if the containing _MountPoint is deleted.
self.lock = threading.Lock()
# The set of attachments on this mountpoint.
self.attachments = set()
def add_attachment(self, vol_name, host):
self.attachments.add((vol_name, host))
def remove_attachment(self, vol_name, host):
self.attachments.remove((vol_name, host))
def in_use(self):
return len(self.attachments) > 0
def __init__(self):
"""Initialise _HostMountState"""
self.mountpoints = collections.defaultdict(self._MountPoint)
@contextlib.contextmanager
def _get_locked(self, mountpoint):
"""Get a locked mountpoint object
:param mountpoint: The path of the mountpoint whose object we should
return.
:rtype: _HostMountState._MountPoint
"""
while True:
mount = self.mountpoints[mountpoint]
with mount.lock:
if self.mountpoints[mountpoint] is mount:
yield mount
break
def mount(self, fstype, export, vol_name, mountpoint, host,
rootwrap_helper, options):
"""Ensure a mountpoint is available for an attachment, mounting it
if necessary.
If this is the first attachment on this mountpoint, we will mount it
with:
mount -t <fstype> <options> <export> <mountpoint>
:param fstype: The filesystem type to be passed to mount command.
:param export: The type-specific identifier of the filesystem to be
mounted. e.g. for nfs 'host.example.com:/mountpoint'.
:param vol_name: The name of the volume on the remote filesystem.
:param mountpoint: The directory where the filesystem will be
mounted on the local compute host.
:param host: The host the volume will be attached to.
:param options: An arbitrary list of additional arguments to be
passed to the mount command immediate before export
and mountpoint.
"""
LOG.debug('_HostMountState.mount(fstype=%(fstype)s, '
'export=%(export)s, vol_name=%(vol_name)s, %(mountpoint)s, '
'options=%(options)s)',
{'fstype': fstype, 'export': export, 'vol_name': vol_name,
'mountpoint': mountpoint, 'options': options})
with self._get_locked(mountpoint) as mount:
if not os.path.ismount(mountpoint):
LOG.debug('Mounting %(mountpoint)s',
{'mountpoint': mountpoint})
os.makedirs(mountpoint)
mount_cmd = ['mount', '-t', fstype]
if options is not None:
mount_cmd.extend(options)
mount_cmd.extend([export, mountpoint])
try:
processutils.execute(*mount_cmd, run_as_root=True,
root_helper=rootwrap_helper)
except Exception:
# Check to see if mountpoint is mounted despite the error
# eg it was already mounted
if os.path.ismount(mountpoint):
# We're not going to raise the exception because we're
# in the desired state anyway. However, this is still
# unusual so we'll log it.
LOG.exception(_LE('Error mounting %(fstype)s export '
'%(export)s on %(mountpoint)s. '
'Continuing because mountpount is '
'mounted despite this.'),
{'fstype': fstype, 'export': export,
'mountpoint': mountpoint})
else:
# If the mount failed there's no reason for us to keep
# a record of it. It will be created again if the
# caller retries.
# Delete while holding lock
del self.mountpoints[mountpoint]
raise
mount.add_attachment(vol_name, host)
LOG.debug('_HostMountState.mount() for %(mountpoint)s '
'completed successfully',
{'mountpoint': mountpoint})
def umount(self, vol_name, mountpoint, host, rootwrap_helper):
"""Mark an attachment as no longer in use, and unmount its mountpoint
if necessary.
:param vol_name: The name of the volume on the remote filesystem.
:param mountpoint: The directory where the filesystem is be
mounted on the local compute host.
:param host: The host the volume was attached to.
"""
LOG.debug('_HostMountState.umount(vol_name=%(vol_name)s, '
'mountpoint=%(mountpoint)s)',
{'vol_name': vol_name, 'mountpoint': mountpoint})
with self._get_locked(mountpoint) as mount:
try:
mount.remove_attachment(vol_name, host)
except KeyError:
LOG.warning(_LW("Request to remove attachment "
"(%(vol_name)s, %(host)s) from "
"%(mountpoint)s, but we don't think it's in "
"use."),
{'vol_name': vol_name, 'host': host,
'mountpoint': mountpoint})
if not mount.in_use():
mounted = os.path.ismount(mountpoint)
if mounted:
mounted = self._real_umount(mountpoint, rootwrap_helper)
# Delete our record entirely if it's unmounted
if not mounted:
del self.mountpoints[mountpoint]
LOG.debug('_HostMountState.umount() for %(mountpoint)s '
'completed successfully',
{'mountpoint': mountpoint})
def _real_umount(self, mountpoint, rootwrap_helper):
# Unmount and delete a mountpoint.
# Return mount state after umount (i.e. True means still mounted)
LOG.debug('Unmounting %(mountpoint)s', {'mountpoint': mountpoint})
try:
processutils.execute('umount', mountpoint, run_as_root=True,
attempts=3, delay_on_retry=True,
root_helper=rootwrap_helper)
except processutils.ProcessExecutionError as ex:
LOG.error(_LE("Couldn't unmount %(mountpoint)s: %(reason)s"),
{'mountpoint': mountpoint, 'reason': ex})
if not os.path.ismount(mountpoint):
try:
os.rmdir(mountpoint)
except Exception as ex:
LOG.error(_LE("Couldn't remove directory %(mountpoint)s: "
"%(reason)s"),
{'mountpoint': mountpoint,
'reason': ex})
return False
return True
__manager__ = _HostMountStateManager(HOST)
def mount(fstype, export, vol_name, mountpoint, host, rootwrap_helper,
options=None):
"""A convenience wrapper around _HostMountState.mount()"""
with __manager__.get_state() as mount_state:
mount_state.mount(fstype, export, vol_name, mountpoint, host,
rootwrap_helper, options)
def umount(vol_name, mountpoint, host, rootwrap_helper):
"""A convenience wrapper around _HostMountState.umount()"""
with __manager__.get_state() as mount_state:
mount_state.umount(vol_name, mountpoint, host, rootwrap_helper)

View File

@ -21,6 +21,8 @@ System-level utilities and helper functions.
import logging import logging
import uuid import uuid
from oslo_concurrency import lockutils
try: try:
from eventlet import sleep from eventlet import sleep
except ImportError: except ImportError:
@ -31,6 +33,8 @@ from glance_store.i18n import _
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
synchronized = lockutils.synchronized_with_prefix('glance_store-')
def is_uuid_like(val): def is_uuid_like(val):
"""Returns validation of a value as a UUID. """Returns validation of a value as a UUID.

View File

@ -181,3 +181,8 @@ class HasSnapshot(GlanceStoreException):
class InUseByStore(GlanceStoreException): class InUseByStore(GlanceStoreException):
message = _("The image cannot be deleted because it is in use through " message = _("The image cannot be deleted because it is in use through "
"the backend store outside of Glance.") "the backend store outside of Glance.")
class HostNotInitialized(GlanceStoreException):
message = _("The glance cinder store host %(host)s which will used to "
"perform nfs mount/umount operations isn't initialized.")

View File

@ -0,0 +1,145 @@
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import fixtures
import mock
import sys
from oslo_concurrency import processutils
from oslo_config import cfg
from oslotest import base
from glance_store import exceptions
CONF = cfg.CONF
class HostMountManagerTestCase(base.BaseTestCase):
class FakeHostMountState:
def __init__(self):
self.mountpoints = {mock.sentinel.mountpoint}
def setUp(self):
super(HostMountManagerTestCase, self).setUp()
CONF.register_opt(cfg.DictOpt('enabled_backends'))
CONF.set_override('enabled_backends', 'fake:file')
# Since this is mocked in other tests, we unmock it here
if 'glance_store.common.fs_mount' in sys.modules:
sys.modules.pop('glance_store.common.fs_mount')
# Since the _HostMountStateManager class instantiates on its
# import, this import is done here to register the enabled_backends
# config option before it is used during initialization
from glance_store.common import fs_mount as mount # noqa
self.__manager__ = mount.__manager__
def get_state(self):
with self.__manager__.get_state() as state:
return state
def test_get_state_host_not_initialized(self):
self.__manager__.state = None
self.assertRaises(exceptions.HostNotInitialized,
self.get_state)
def test_get_state(self):
self.__manager__.state = self.FakeHostMountState()
state = self.get_state()
self.assertEqual({mock.sentinel.mountpoint}, state.mountpoints)
class HostMountStateTestCase(base.BaseTestCase):
def setUp(self):
super(HostMountStateTestCase, self).setUp()
CONF.register_opt(cfg.DictOpt('enabled_backends'))
CONF.set_override('enabled_backends', 'fake:file')
# Since this is mocked in other tests, we unmock it here
if 'glance_store.common.fs_mount' in sys.modules:
sys.modules.pop('glance_store.common.fs_mount')
# Since the _HostMountStateManager class instantiates on its
# import, this import is done here to register the enabled_backends
# config option before it is used during initialization
from glance_store.common import fs_mount as mount # noqa
self.mounted = set()
self.m = mount._HostMountState()
def fake_execute(cmd, *args, **kwargs):
if cmd == 'mount':
path = args[-1]
if path in self.mounted:
raise processutils.ProcessExecutionError('Already mounted')
self.mounted.add(path)
elif cmd == 'umount':
path = args[-1]
if path not in self.mounted:
raise processutils.ProcessExecutionError('Not mounted')
self.mounted.remove(path)
def fake_ismount(path):
return path in self.mounted
mock_execute = mock.MagicMock(side_effect=fake_execute)
self.useFixture(fixtures.MonkeyPatch(
'oslo_concurrency.processutils.execute',
mock_execute))
self.useFixture(fixtures.MonkeyPatch('os.path.ismount', fake_ismount))
@staticmethod
def _expected_sentinel_mount_calls(mountpoint=mock.sentinel.mountpoint):
return [mock.call('mount', '-t', mock.sentinel.fstype,
mock.sentinel.option1, mock.sentinel.option2,
mock.sentinel.export, mountpoint,
root_helper=mock.sentinel.rootwrap_helper,
run_as_root=True)]
@staticmethod
def _expected_sentinel_umount_calls(mountpoint=mock.sentinel.mountpoint):
return [mock.call('umount', mountpoint, attempts=3,
delay_on_retry=True,
root_helper=mock.sentinel.rootwrap_helper,
run_as_root=True)]
def _sentinel_mount(self):
self.m.mount(mock.sentinel.fstype, mock.sentinel.export,
mock.sentinel.vol, mock.sentinel.mountpoint,
mock.sentinel.host, mock.sentinel.rootwrap_helper,
[mock.sentinel.option1, mock.sentinel.option2])
def _sentinel_umount(self):
self.m.umount(mock.sentinel.vol, mock.sentinel.mountpoint,
mock.sentinel.host, mock.sentinel.rootwrap_helper)
@mock.patch('os.makedirs')
def test_mount(self, mock_makedirs):
self._sentinel_mount()
mock_makedirs.assert_called_once()
processutils.execute.assert_has_calls(
self._expected_sentinel_mount_calls())
def test_unmount_without_mount(self):
self._sentinel_umount()
processutils.execute.assert_not_called()
@mock.patch('os.rmdir')
@mock.patch('os.makedirs')
def test_umount_with_mount(self, mock_makedirs, mock_rmdir):
self._sentinel_mount()
self._sentinel_umount()
mock_makedirs.assert_called_once()
mock_rmdir.assert_called_once()
processutils.execute.assert_has_calls(
self._expected_sentinel_mount_calls() +
self._expected_sentinel_umount_calls())

View File

@ -20,6 +20,7 @@ import mock
import os import os
import six import six
import socket import socket
import sys
import tempfile import tempfile
import time import time
import uuid import uuid
@ -29,12 +30,14 @@ from os_brick.initiator import connector
from oslo_concurrency import processutils from oslo_concurrency import processutils
from oslo_utils import units from oslo_utils import units
from glance_store._drivers import cinder
from glance_store import exceptions from glance_store import exceptions
from glance_store import location from glance_store import location
from glance_store.tests import base from glance_store.tests import base
from glance_store.tests.unit import test_store_capabilities from glance_store.tests.unit import test_store_capabilities
sys.modules['glance_store.common.fs_mount'] = mock.Mock()
from glance_store._drivers import cinder # noqa
class FakeObject(object): class FakeObject(object):
def __init__(self, **kwargs): def __init__(self, **kwargs):

View File

@ -20,6 +20,7 @@ import mock
import os import os
import six import six
import socket import socket
import sys
import tempfile import tempfile
import time import time
import uuid import uuid
@ -31,12 +32,14 @@ from oslo_config import cfg
from oslo_utils import units from oslo_utils import units
import glance_store as store import glance_store as store
from glance_store._drivers import cinder
from glance_store import exceptions from glance_store import exceptions
from glance_store import location from glance_store import location
from glance_store.tests import base from glance_store.tests import base
from glance_store.tests.unit import test_store_capabilities as test_cap from glance_store.tests.unit import test_store_capabilities as test_cap
sys.modules['glance_store.common.fs_mount'] = mock.Mock()
from glance_store._drivers import cinder # noqa
class FakeObject(object): class FakeObject(object):
def __init__(self, **kwargs): def __init__(self, **kwargs):