Fix FC multipath rescan

Fiber Chanel multipath rescan uses wildcards for the host rescan, which
can end up recreating devices that had just been removed if there's a
race condition between the removal of a SCSI device and the connection
of a volume.

The race condition happens if a rescan done when attaching happens right
between us removing the path and removing the lun, because the rescan
will add not only the new path we are attaching, but the old path we are
removing, since the lun still hasn't been removed.

This would leave orphaned devices that pollute our environment and will
be recognized as down paths when the storage controller reuses the same
WWID.

This patch narrows the rescan to only rescan for the specific lun
number, and if possible it also filters the rescan by HBA channel and
SCSI target ID.

We only filter by HBA channel and SCSI target ID when we can find this
information, and that is when the FC storage servers implement a single
WWNN for all ports.

Change-Id: Id6ed98d3fb8b4b980de86256dec8eeda84562c98
Closes-Bug: #1608614
This commit is contained in:
Gorka Eguileor 2016-07-27 14:06:06 +02:00
parent c5e3d8affb
commit 28a4d55a0a
3 changed files with 129 additions and 11 deletions

View File

@ -160,7 +160,8 @@ class FibreChannelConnector(base.BaseLinuxConnector):
"Will rescan & retry. Try number: %(tries)s."),
{'tries': tries})
self._linuxfc.rescan_hosts(hbas)
self._linuxfc.rescan_hosts(hbas,
connection_properties['target_lun'])
self.tries = self.tries + 1
self.host_device = None

View File

@ -20,17 +20,55 @@ import os
from oslo_concurrency import processutils as putils
from oslo_log import log as logging
from os_brick.i18n import _LW
from os_brick.i18n import _LE, _LW
from os_brick.initiator import linuxscsi
LOG = logging.getLogger(__name__)
class LinuxFibreChannel(linuxscsi.LinuxSCSI):
def rescan_hosts(self, hbas):
def _get_hba_channel_scsi_target(self, hba):
"""Try to get the HBA channel and SCSI target for an HBA.
This method only works for Fibre Channel targets that implement a
single WWNN for all ports, so caller should expect us to return either
None or an empty list.
"""
# Leave only the number from the host_device field (ie: host6)
host_device = hba['host_device']
if host_device and len(host_device) > 4:
host_device = host_device[4:]
path = '/sys/class/fc_transport/target%s:' % host_device
cmd = 'grep %(wwnn)s %(path)s*/node_name' % {'wwnn': hba['node_name'],
'path': path}
try:
out, _err = self._execute(cmd)
return [line.split('/')[4].split(':')[1:]
for line in out.split('\n') if line.startswith(path)]
except Exception as exc:
LOG.error(_LE('Could not get HBA channel and SCSI target ID, '
'reason: %s'), exc)
return None
def rescan_hosts(self, hbas, target_lun):
for hba in hbas:
self.echo_scsi_command("/sys/class/scsi_host/%s/scan"
% hba['host_device'], "- - -")
# Try to get HBA channel and SCSI target to use as filters
cts = self._get_hba_channel_scsi_target(hba)
# If we couldn't get the channel and target use wildcards
if not cts:
cts = [('-', '-')]
for hba_channel, target_id in cts:
LOG.debug('Scanning host %(host)s (wwnn: %(wwnn)s, c: '
'%(channel)s, t: %(target)s, l: %(lun)s)',
{'host': hba['host_device'],
'wwnn': hba['node_name'], 'channel': hba_channel,
'target': target_id, 'lun': target_lun})
self.echo_scsi_command(
"/sys/class/scsi_host/%s/scan" % hba['host_device'],
"%(c)s %(t)s %(l)s" % {'c': hba_channel,
't': target_id,
'l': target_lun})
def get_fc_hbas(self):
"""Get the Fibre Channel HBA information."""

View File

@ -35,12 +35,91 @@ class LinuxFCTestCase(base.TestCase):
return "", None
def test_rescan_hosts(self):
hbas = [{'host_device': 'foo'},
{'host_device': 'bar'}, ]
self.lfc.rescan_hosts(hbas)
expected_commands = ['tee -a /sys/class/scsi_host/foo/scan',
'tee -a /sys/class/scsi_host/bar/scan']
self.assertEqual(expected_commands, self.cmds)
# We check that we try to get the HBA channel and SCSI target
execute_results = (
('/sys/class/fc_transport/target10:2:3/node_name:'
'0x5006016090203181\n/sys/class/fc_transport/target10:4:5/'
'node_name:0x5006016090203181', ''),
None,
None,
('/sys/class/fc_transport/target11:6:7/node_name:'
'0x5006016090203181\n/sys/class/fc_transport/target11:8:9/'
'node_name:0x5006016090203181', ''),
None,
None)
hbas = [{'host_device': 'host10', 'node_name': '5006016090203181'},
{'host_device': 'host11', 'node_name': '5006016090203181'}]
with mock.patch.object(self.lfc, '_execute',
side_effect=execute_results) as execute_mock:
self.lfc.rescan_hosts(hbas, 1)
expected_commands = [
mock.call('grep 5006016090203181 /sys/class/fc_transport/'
'target10:*/node_name'),
mock.call('tee', '-a', '/sys/class/scsi_host/host10/scan',
process_input='2 3 1',
root_helper=None, run_as_root=True),
mock.call('tee', '-a', '/sys/class/scsi_host/host10/scan',
process_input='4 5 1',
root_helper=None, run_as_root=True),
mock.call('grep 5006016090203181 /sys/class/fc_transport/'
'target11:*/node_name'),
mock.call('tee', '-a', '/sys/class/scsi_host/host11/scan',
process_input='6 7 1',
root_helper=None, run_as_root=True),
mock.call('tee', '-a', '/sys/class/scsi_host/host11/scan',
process_input='8 9 1',
root_helper=None, run_as_root=True)]
execute_mock.assert_has_calls(expected_commands)
self.assertEqual(len(expected_commands), execute_mock.call_count)
def test_rescan_hosts_wildcard(self):
hbas = [{'host_device': 'host10', 'node_name': '5006016090203181'},
{'host_device': 'host11', 'node_name': '5006016090203181'}]
with mock.patch.object(self.lfc, '_get_hba_channel_scsi_target',
return_value=None), \
mock.patch.object(self.lfc, '_execute',
return_value=None) as execute_mock:
self.lfc.rescan_hosts(hbas, 1)
expected_commands = [
mock.call('tee', '-a', '/sys/class/scsi_host/host10/scan',
process_input='- - 1',
root_helper=None, run_as_root=True),
mock.call('tee', '-a', '/sys/class/scsi_host/host11/scan',
process_input='- - 1',
root_helper=None, run_as_root=True)]
execute_mock.assert_has_calls(expected_commands)
self.assertEqual(len(expected_commands), execute_mock.call_count)
def test_rescan_hosts_wildcard_exception(self):
def _execute(cmd, *args, **kwargs):
if cmd.startswith('grep'):
raise Exception
hbas = [{'host_device': 'host10', 'node_name': '5006016090203181'},
{'host_device': 'host11', 'node_name': '5006016090203181'}]
with mock.patch.object(self.lfc, '_execute',
side_effect=_execute) as execute_mock:
self.lfc.rescan_hosts(hbas, 1)
expected_commands = [
mock.call('grep 5006016090203181 /sys/class/fc_transport/'
'target10:*/node_name'),
mock.call('tee', '-a', '/sys/class/scsi_host/host10/scan',
process_input='- - 1',
root_helper=None, run_as_root=True),
mock.call('grep 5006016090203181 /sys/class/fc_transport/'
'target11:*/node_name'),
mock.call('tee', '-a', '/sys/class/scsi_host/host11/scan',
process_input='- - 1',
root_helper=None, run_as_root=True)]
execute_mock.assert_has_calls(expected_commands)
self.assertEqual(len(expected_commands), execute_mock.call_count)
def test_get_fc_hbas_fail(self):
def fake_exec1(a, b, c, d, run_as_root=True, root_helper='sudo'):