400ca5d6db
This patch refactors iSCSI disconnect code changing the approach to one that just uses `iscsiadm -m session` and sysfs to get all the required information: devices from the connection, multipath system device name, multipath name, the WWN for the block devices... By doing so, not only do we fix a good number of bugs, but we also improve the reliability and speed of the mechanism. A good example of improvements and benefits achieved by this patch are: - Common code for multipath and single path disconnects. - No more querying iSCSI devices for their WWN (page 0x83) removing delays and issue on flaky connections. - All devices are properly cleaned even if they are not part of the multipath. - We wait for device removal and do it in parallel if there are multiple. - Removed usage of `multipath -l` to find devices which is really slow with flaky connections and didn't work when called with a device from a path that is down. - Prevent losing data when detaching, currently if the multipath flush fails for any other reason than "in use" we silently continue with the removal. That is the case when all paths are momentarily down. - Adds a new mechanism for the caller of the disconnect to specify that it's acceptable to lose data and that it's more important to leave a clean system. That is the case if we are creating a volume from an image, since the volume will just be set to error, but we don't want leftovers. Optionally we can tell os-brick to ignore errors and don't raise an exception if the flush fails. - Add a warning when we could be leaving leftovers behind due to disconnect issues. - Action retries (like multipath flush) will now only log the final exception instead of logging all the exceptions. - Flushes of individual paths now use exponential backoff retries instead of random retries between 0.2 and 2 seconds (from oslo library). - We no longer use symlinks from `/dev/disk/by-path`, `/dev/disk/by-id`, or `/dev/mapper` to find devices or multipaths, as they could be leftovers from previous runs. - With high failure rates (above 30%) some CLI calls will enter into a weird state where they wait forever, so we add a timeout mechanism in our `execute` method and add it to those specific calls. Closes-Bug: #1502534 Change-Id: I058ff0a0e5ad517507dc3cda39087c913558561d
194 lines
7.6 KiB
Python
194 lines
7.6 KiB
Python
# All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import os
|
|
|
|
from oslo_concurrency import lockutils
|
|
from oslo_log import log as logging
|
|
|
|
from os_brick import exception
|
|
from os_brick.i18n import _
|
|
from os_brick.initiator.connectors import base
|
|
from os_brick import utils
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
synchronized = lockutils.synchronized_with_prefix('os-brick-')
|
|
|
|
|
|
class HuaweiStorHyperConnector(base.BaseLinuxConnector):
|
|
""""Connector class to attach/detach SDSHypervisor volumes."""
|
|
|
|
attached_success_code = 0
|
|
has_been_attached_code = 50151401
|
|
attach_mnid_done_code = 50151405
|
|
vbs_unnormal_code = 50151209
|
|
not_mount_node_code = 50155007
|
|
iscliexist = True
|
|
|
|
def __init__(self, root_helper, driver=None,
|
|
*args, **kwargs):
|
|
self.cli_path = os.getenv('HUAWEISDSHYPERVISORCLI_PATH')
|
|
if not self.cli_path:
|
|
self.cli_path = '/usr/local/bin/sds/sds_cli'
|
|
LOG.debug("CLI path is not configured, using default %s.",
|
|
self.cli_path)
|
|
if not os.path.isfile(self.cli_path):
|
|
self.iscliexist = False
|
|
LOG.error('SDS CLI file not found, '
|
|
'HuaweiStorHyperConnector init failed.')
|
|
super(HuaweiStorHyperConnector, self).__init__(root_helper,
|
|
driver=driver,
|
|
*args, **kwargs)
|
|
|
|
@staticmethod
|
|
def get_connector_properties(root_helper, *args, **kwargs):
|
|
"""The HuaweiStor connector properties."""
|
|
return {}
|
|
|
|
def get_search_path(self):
|
|
# TODO(walter-boring): Where is the location on the filesystem to
|
|
# look for Huawei volumes to show up?
|
|
return None
|
|
|
|
def get_all_available_volumes(self, connection_properties=None):
|
|
# TODO(walter-boring): what to return here for all Huawei volumes ?
|
|
return []
|
|
|
|
def get_volume_paths(self, connection_properties):
|
|
volume_path = None
|
|
try:
|
|
volume_path = self._get_volume_path(connection_properties)
|
|
except Exception:
|
|
msg = _("Couldn't find a volume.")
|
|
LOG.warning(msg)
|
|
raise exception.BrickException(message=msg)
|
|
return [volume_path]
|
|
|
|
def _get_volume_path(self, connection_properties):
|
|
out = self._query_attached_volume(
|
|
connection_properties['volume_id'])
|
|
if not out or int(out['ret_code']) != 0:
|
|
msg = _("Couldn't find attached volume.")
|
|
LOG.error(msg)
|
|
raise exception.BrickException(message=msg)
|
|
return out['dev_addr']
|
|
|
|
@utils.trace
|
|
@synchronized('connect_volume')
|
|
def connect_volume(self, connection_properties):
|
|
"""Connect to a volume.
|
|
|
|
:param connection_properties: The dictionary that describes all
|
|
of the target volume attributes.
|
|
:type connection_properties: dict
|
|
:returns: dict
|
|
"""
|
|
LOG.debug("Connect_volume connection properties: %s.",
|
|
connection_properties)
|
|
out = self._attach_volume(connection_properties['volume_id'])
|
|
if not out or int(out['ret_code']) not in (self.attached_success_code,
|
|
self.has_been_attached_code,
|
|
self.attach_mnid_done_code):
|
|
msg = (_("Attach volume failed, "
|
|
"error code is %s") % out['ret_code'])
|
|
raise exception.BrickException(message=msg)
|
|
|
|
try:
|
|
volume_path = self._get_volume_path(connection_properties)
|
|
except Exception:
|
|
msg = _("query attached volume failed or volume not attached.")
|
|
LOG.error(msg)
|
|
raise exception.BrickException(message=msg)
|
|
|
|
device_info = {'type': 'block',
|
|
'path': volume_path}
|
|
return device_info
|
|
|
|
@utils.trace
|
|
@synchronized('connect_volume')
|
|
def disconnect_volume(self, connection_properties, device_info,
|
|
force=False, ignore_errors=False):
|
|
"""Disconnect a volume from the local host.
|
|
|
|
:param connection_properties: The dictionary that describes all
|
|
of the target volume attributes.
|
|
:type connection_properties: dict
|
|
:param device_info: historical difference, but same as connection_props
|
|
:type device_info: dict
|
|
"""
|
|
LOG.debug("Disconnect_volume: %s.", connection_properties)
|
|
out = self._detach_volume(connection_properties['volume_id'])
|
|
if not out or int(out['ret_code']) not in (self.attached_success_code,
|
|
self.vbs_unnormal_code,
|
|
self.not_mount_node_code):
|
|
msg = (_("Disconnect_volume failed, "
|
|
"error code is %s") % out['ret_code'])
|
|
raise exception.BrickException(message=msg)
|
|
|
|
def is_volume_connected(self, volume_name):
|
|
"""Check if volume already connected to host"""
|
|
LOG.debug('Check if volume %s already connected to a host.',
|
|
volume_name)
|
|
out = self._query_attached_volume(volume_name)
|
|
if out:
|
|
return int(out['ret_code']) == 0
|
|
return False
|
|
|
|
def _attach_volume(self, volume_name):
|
|
return self._cli_cmd('attach', volume_name)
|
|
|
|
def _detach_volume(self, volume_name):
|
|
return self._cli_cmd('detach', volume_name)
|
|
|
|
def _query_attached_volume(self, volume_name):
|
|
return self._cli_cmd('querydev', volume_name)
|
|
|
|
def _cli_cmd(self, method, volume_name):
|
|
LOG.debug("Enter into _cli_cmd.")
|
|
if not self.iscliexist:
|
|
msg = _("SDS command line doesn't exist, "
|
|
"can't execute SDS command.")
|
|
raise exception.BrickException(message=msg)
|
|
if not method or volume_name is None:
|
|
return
|
|
cmd = [self.cli_path, '-c', method, '-v', volume_name]
|
|
out, clilog = self._execute(*cmd, run_as_root=False,
|
|
root_helper=self._root_helper)
|
|
analyse_result = self._analyze_output(out)
|
|
LOG.debug('%(method)s volume returns %(analyse_result)s.',
|
|
{'method': method, 'analyse_result': analyse_result})
|
|
if clilog:
|
|
LOG.error("SDS CLI output some log: %s.", clilog)
|
|
return analyse_result
|
|
|
|
def _analyze_output(self, out):
|
|
LOG.debug("Enter into _analyze_output.")
|
|
if out:
|
|
analyse_result = {}
|
|
out_temp = out.split('\n')
|
|
for line in out_temp:
|
|
LOG.debug("Line is %s.", line)
|
|
if line.find('=') != -1:
|
|
key, val = line.split('=', 1)
|
|
LOG.debug("%(key)s = %(val)s", {'key': key, 'val': val})
|
|
if key in ['ret_code', 'ret_desc', 'dev_addr']:
|
|
analyse_result[key] = val
|
|
return analyse_result
|
|
else:
|
|
return None
|
|
|
|
def extend_volume(self, connection_properties):
|
|
# TODO(walter-boring): is this possible?
|
|
raise NotImplementedError
|