Current code doesn't disconnect NVMe-oF subsystems when doing a disconnect_volume or on connect_volume failure. This is very problematic for systems that don't share subsytems for multiple namespaces, because both the device (i.e., /dev/nvme0n1) and the subsystem (i.e., /dev/nvme0) will stay forever (now that we connect with the controller loss timeout set to infinite, before it was for 10 minutes) in the system (until manually removed) while the host keeps trying to connect to the remote subsystem, but it won't be able to connect because in this case drivers usually destroy both the namespace and the subsystem simultaneously (so there's no AER message to indicate the change in available namespaces within the subsystem). We'll experience multiple issues with all these leftover devices, such as an ever increasing number of kernel log messages with the connection retries, possible exhaustion of number of connected NVMe subsystems and/or files in /dev, and so on. This patch makes sure the nvmeof connector disconnects a subsystem when there is no longer a namespace present or when the only namespace present is the one we are disconnecting. This is done both on the disconnect_volume call as well as failures during connect_volume. This is not a full solution to the problem of leaving leftover devices, because for drivers that share the subsystem there are race conditions between unexport/unmap of volumes on the cinder side and os-brick disconnect_volume calls. To fully prevent this situation Cinder needs to start reporting the shared_targets value for NVMe volumes (something it's already doing for iSCSI). Partial-Bug: #1961102 Change-Id: Ia00be53420307d6ac1f100420d039da7b65dc349
238 lines
7.3 KiB
Python
238 lines
7.3 KiB
Python
# (c) Copyright 2013 Hewlett-Packard Development Company, L.P.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
"""Exceptions for the Brick library."""
|
|
|
|
import traceback
|
|
from typing import Any, List, Optional # noqa: H301
|
|
|
|
from oslo_concurrency import processutils as putils
|
|
from oslo_log import log as logging
|
|
|
|
from os_brick.i18n import _
|
|
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
|
|
class BrickException(Exception):
|
|
"""Base Brick Exception
|
|
|
|
To correctly use this class, inherit from it and define
|
|
a 'message' property. That message will get printf'd
|
|
with the keyword arguments provided to the constructor.
|
|
"""
|
|
message = _("An unknown exception occurred.")
|
|
code = 500
|
|
headers: dict = {}
|
|
safe = False
|
|
|
|
def __init__(self, message=None, **kwargs):
|
|
self.kwargs = kwargs
|
|
|
|
if 'code' not in self.kwargs:
|
|
try:
|
|
self.kwargs['code'] = self.code
|
|
except AttributeError:
|
|
pass
|
|
|
|
if not message:
|
|
try:
|
|
message = self.message % kwargs
|
|
|
|
except Exception:
|
|
# kwargs doesn't match a variable in the message
|
|
# log the issue and the kwargs
|
|
LOG.exception("Exception in string format operation. "
|
|
"msg='%s'", self.message)
|
|
for name, value in kwargs.items():
|
|
LOG.error("%(name)s: %(value)s", {'name': name,
|
|
'value': value})
|
|
|
|
# at least get the core message out if something happened
|
|
message = self.message
|
|
|
|
# Put the message in 'msg' so that we can access it. If we have it in
|
|
# message it will be overshadowed by the class' message attribute
|
|
self.msg = message
|
|
super(BrickException, self).__init__(message)
|
|
|
|
|
|
class NotFound(BrickException):
|
|
message = _("Resource could not be found.")
|
|
code = 404
|
|
safe = True
|
|
|
|
|
|
class Invalid(BrickException):
|
|
message = _("Unacceptable parameters.")
|
|
code = 400
|
|
|
|
|
|
# Cannot be templated as the error syntax varies.
|
|
# msg needs to be constructed when raised.
|
|
class InvalidParameterValue(Invalid):
|
|
message = _("%(err)s")
|
|
|
|
|
|
class NoFibreChannelHostsFound(BrickException):
|
|
message = _("We are unable to locate any Fibre Channel devices.")
|
|
|
|
|
|
class NoFibreChannelVolumeDeviceFound(BrickException):
|
|
message = _("Unable to find a Fibre Channel volume device.")
|
|
|
|
|
|
class VolumeNotDeactivated(BrickException):
|
|
message = _('Volume %(name)s was not deactivated in time.')
|
|
|
|
|
|
class VolumeDeviceNotFound(BrickException):
|
|
message = _("Volume device not found at %(device)s.")
|
|
|
|
|
|
class VolumePathsNotFound(BrickException):
|
|
message = _("Could not find any paths for the volume.")
|
|
|
|
|
|
class VolumePathNotRemoved(BrickException):
|
|
message = _("Volume path %(volume_path)s was not removed in time.")
|
|
|
|
|
|
class ProtocolNotSupported(BrickException):
|
|
message = _("Connect to volume via protocol %(protocol)s not supported.")
|
|
|
|
|
|
class TargetPortalNotFound(BrickException):
|
|
message = _("Unable to find target portal %(target_portal)s.")
|
|
|
|
|
|
class TargetPortalsNotFound(TargetPortalNotFound):
|
|
message = _("Unable to find target portal in %(target_portals)s.")
|
|
|
|
|
|
class FailedISCSITargetPortalLogin(BrickException):
|
|
message = _("Unable to login to iSCSI Target Portal")
|
|
|
|
|
|
class BlockDeviceReadOnly(BrickException):
|
|
message = _("Block device %(device)s is Read-Only.")
|
|
|
|
|
|
class VolumeGroupNotFound(BrickException):
|
|
message = _("Unable to find Volume Group: %(vg_name)s")
|
|
|
|
|
|
class VolumeGroupCreationFailed(BrickException):
|
|
message = _("Failed to create Volume Group: %(vg_name)s")
|
|
|
|
|
|
class CommandExecutionFailed(BrickException):
|
|
message = _("Failed to execute command %(cmd)s")
|
|
|
|
|
|
class VolumeDriverException(BrickException):
|
|
message = _('An error occurred while IO to volume %(name)s.')
|
|
|
|
|
|
class InvalidIOHandleObject(BrickException):
|
|
message = _('IO handle of %(protocol)s has wrong object '
|
|
'type %(actual_type)s.')
|
|
|
|
|
|
class VolumeEncryptionNotSupported(Invalid):
|
|
message = _("Volume encryption is not supported for %(volume_type)s "
|
|
"volume %(volume_id)s.")
|
|
|
|
|
|
class VolumeLocalCacheNotSupported(Invalid):
|
|
message = _("Volume local cache is not supported for %(volume_type)s "
|
|
"volume %(volume_id)s.")
|
|
|
|
|
|
# NOTE(mriedem): This extends ValueError to maintain backward compatibility.
|
|
class InvalidConnectorProtocol(ValueError):
|
|
pass
|
|
|
|
|
|
class ExceptionChainer(BrickException):
|
|
"""A Exception that can contain a group of exceptions.
|
|
|
|
This exception serves as a container for exceptions, useful when we want to
|
|
store all exceptions that happened during a series of steps and then raise
|
|
them all together as one.
|
|
|
|
The representation of the exception will include all exceptions and their
|
|
tracebacks.
|
|
|
|
This class also includes a context manager for convenience, one that will
|
|
support both swallowing the exception as if nothing had happened and
|
|
raising the exception. In both cases the exception will be stored.
|
|
|
|
If a message is provided to the context manager it will be formatted and
|
|
logged with warning level.
|
|
"""
|
|
def __init__(self, *args, **kwargs):
|
|
self._exceptions: List[tuple] = []
|
|
self._repr: Optional[str] = None
|
|
self._exc_msg_args = []
|
|
super(ExceptionChainer, self).__init__(*args, **kwargs)
|
|
|
|
def __repr__(self):
|
|
# Since generating the representation can be slow we cache it
|
|
if not self._repr:
|
|
tracebacks = (
|
|
''.join(traceback.format_exception(*e)).replace('\n', '\n\t')
|
|
for e in self._exceptions)
|
|
self._repr = '\n'.join('\nChained Exception #%s\n\t%s' % (i + 1, t)
|
|
for i, t in enumerate(tracebacks))
|
|
return self._repr
|
|
|
|
__str__ = __repr__
|
|
|
|
def __nonzero__(self) -> bool:
|
|
# We want to be able to do boolean checks on the exception
|
|
return bool(self._exceptions)
|
|
|
|
__bool__ = __nonzero__ # For Python 3
|
|
|
|
def add_exception(self, exc_type, exc_val, exc_tb) -> None:
|
|
# Clear the representation cache
|
|
self._repr = None
|
|
self._exceptions.append((exc_type, exc_val, exc_tb))
|
|
|
|
def context(self,
|
|
catch_exception: bool,
|
|
msg: str = '',
|
|
*msg_args: Any):
|
|
self._catch_exception = catch_exception
|
|
self._exc_msg = msg
|
|
self._exc_msg_args = list(msg_args)
|
|
return self
|
|
|
|
def __enter__(self):
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
if exc_type:
|
|
self.add_exception(exc_type, exc_val, exc_tb)
|
|
if self._exc_msg:
|
|
LOG.warning(self._exc_msg, *self._exc_msg_args)
|
|
if self._catch_exception:
|
|
return True
|
|
|
|
|
|
class ExecutionTimeout(putils.ProcessExecutionError):
|
|
pass
|