Browse Source

Fix FC multipath rescan

Fiber Chanel multipath rescan uses wildcards for the host rescan, which
can end up recreating devices that had just been removed if there's a
race condition between the removal of a SCSI device and the connection
of a volume.

The race condition happens if a rescan done when attaching happens right
between us removing the path and removing the lun, because the rescan
will add not only the new path we are attaching, but the old path we are
removing, since the lun still hasn't been removed.

This would leave orphaned devices that pollute our environment and will
be recognized as down paths when the storage controller reuses the same
WWID.

This patch narrows the rescan to only rescan for the specific lun
number, and if possible it also filters the rescan by HBA channel and
SCSI target ID.

We only filter by HBA channel and SCSI target ID when we can find this
information, and that is when the FC storage servers implement a single
WWNN for all ports.

Change-Id: Id6ed98d3fb8b4b980de86256dec8eeda84562c98
Closes-Bug: #1608614
tags/1.6.0
Gorka Eguileor 4 years ago
parent
commit
28a4d55a0a
3 changed files with 129 additions and 11 deletions
  1. +2
    -1
      os_brick/initiator/connectors/fibre_channel.py
  2. +42
    -4
      os_brick/initiator/linuxfc.py
  3. +85
    -6
      os_brick/tests/initiator/test_linuxfc.py

+ 2
- 1
os_brick/initiator/connectors/fibre_channel.py View File

@@ -160,7 +160,8 @@ class FibreChannelConnector(base.BaseLinuxConnector):
"Will rescan & retry. Try number: %(tries)s."),
{'tries': tries})

self._linuxfc.rescan_hosts(hbas)
self._linuxfc.rescan_hosts(hbas,
connection_properties['target_lun'])
self.tries = self.tries + 1

self.host_device = None


+ 42
- 4
os_brick/initiator/linuxfc.py View File

@@ -20,17 +20,55 @@ import os
from oslo_concurrency import processutils as putils
from oslo_log import log as logging

from os_brick.i18n import _LW
from os_brick.i18n import _LE, _LW
from os_brick.initiator import linuxscsi

LOG = logging.getLogger(__name__)


class LinuxFibreChannel(linuxscsi.LinuxSCSI):
def rescan_hosts(self, hbas):
def _get_hba_channel_scsi_target(self, hba):
"""Try to get the HBA channel and SCSI target for an HBA.

This method only works for Fibre Channel targets that implement a
single WWNN for all ports, so caller should expect us to return either
None or an empty list.
"""
# Leave only the number from the host_device field (ie: host6)
host_device = hba['host_device']
if host_device and len(host_device) > 4:
host_device = host_device[4:]

path = '/sys/class/fc_transport/target%s:' % host_device
cmd = 'grep %(wwnn)s %(path)s*/node_name' % {'wwnn': hba['node_name'],
'path': path}
try:
out, _err = self._execute(cmd)
return [line.split('/')[4].split(':')[1:]
for line in out.split('\n') if line.startswith(path)]
except Exception as exc:
LOG.error(_LE('Could not get HBA channel and SCSI target ID, '
'reason: %s'), exc)
return None

def rescan_hosts(self, hbas, target_lun):
for hba in hbas:
self.echo_scsi_command("/sys/class/scsi_host/%s/scan"
% hba['host_device'], "- - -")
# Try to get HBA channel and SCSI target to use as filters
cts = self._get_hba_channel_scsi_target(hba)
# If we couldn't get the channel and target use wildcards
if not cts:
cts = [('-', '-')]
for hba_channel, target_id in cts:
LOG.debug('Scanning host %(host)s (wwnn: %(wwnn)s, c: '
'%(channel)s, t: %(target)s, l: %(lun)s)',
{'host': hba['host_device'],
'wwnn': hba['node_name'], 'channel': hba_channel,
'target': target_id, 'lun': target_lun})
self.echo_scsi_command(
"/sys/class/scsi_host/%s/scan" % hba['host_device'],
"%(c)s %(t)s %(l)s" % {'c': hba_channel,
't': target_id,
'l': target_lun})

def get_fc_hbas(self):
"""Get the Fibre Channel HBA information."""


+ 85
- 6
os_brick/tests/initiator/test_linuxfc.py View File

@@ -35,12 +35,91 @@ class LinuxFCTestCase(base.TestCase):
return "", None

def test_rescan_hosts(self):
hbas = [{'host_device': 'foo'},
{'host_device': 'bar'}, ]
self.lfc.rescan_hosts(hbas)
expected_commands = ['tee -a /sys/class/scsi_host/foo/scan',
'tee -a /sys/class/scsi_host/bar/scan']
self.assertEqual(expected_commands, self.cmds)
# We check that we try to get the HBA channel and SCSI target
execute_results = (
('/sys/class/fc_transport/target10:2:3/node_name:'
'0x5006016090203181\n/sys/class/fc_transport/target10:4:5/'
'node_name:0x5006016090203181', ''),
None,
None,
('/sys/class/fc_transport/target11:6:7/node_name:'
'0x5006016090203181\n/sys/class/fc_transport/target11:8:9/'
'node_name:0x5006016090203181', ''),
None,
None)
hbas = [{'host_device': 'host10', 'node_name': '5006016090203181'},
{'host_device': 'host11', 'node_name': '5006016090203181'}]
with mock.patch.object(self.lfc, '_execute',
side_effect=execute_results) as execute_mock:
self.lfc.rescan_hosts(hbas, 1)
expected_commands = [
mock.call('grep 5006016090203181 /sys/class/fc_transport/'
'target10:*/node_name'),
mock.call('tee', '-a', '/sys/class/scsi_host/host10/scan',
process_input='2 3 1',
root_helper=None, run_as_root=True),
mock.call('tee', '-a', '/sys/class/scsi_host/host10/scan',
process_input='4 5 1',
root_helper=None, run_as_root=True),
mock.call('grep 5006016090203181 /sys/class/fc_transport/'
'target11:*/node_name'),
mock.call('tee', '-a', '/sys/class/scsi_host/host11/scan',
process_input='6 7 1',
root_helper=None, run_as_root=True),
mock.call('tee', '-a', '/sys/class/scsi_host/host11/scan',
process_input='8 9 1',
root_helper=None, run_as_root=True)]

execute_mock.assert_has_calls(expected_commands)
self.assertEqual(len(expected_commands), execute_mock.call_count)

def test_rescan_hosts_wildcard(self):
hbas = [{'host_device': 'host10', 'node_name': '5006016090203181'},
{'host_device': 'host11', 'node_name': '5006016090203181'}]
with mock.patch.object(self.lfc, '_get_hba_channel_scsi_target',
return_value=None), \
mock.patch.object(self.lfc, '_execute',
return_value=None) as execute_mock:

self.lfc.rescan_hosts(hbas, 1)

expected_commands = [
mock.call('tee', '-a', '/sys/class/scsi_host/host10/scan',
process_input='- - 1',
root_helper=None, run_as_root=True),
mock.call('tee', '-a', '/sys/class/scsi_host/host11/scan',
process_input='- - 1',
root_helper=None, run_as_root=True)]

execute_mock.assert_has_calls(expected_commands)
self.assertEqual(len(expected_commands), execute_mock.call_count)

def test_rescan_hosts_wildcard_exception(self):
def _execute(cmd, *args, **kwargs):
if cmd.startswith('grep'):
raise Exception

hbas = [{'host_device': 'host10', 'node_name': '5006016090203181'},
{'host_device': 'host11', 'node_name': '5006016090203181'}]
with mock.patch.object(self.lfc, '_execute',
side_effect=_execute) as execute_mock:

self.lfc.rescan_hosts(hbas, 1)

expected_commands = [
mock.call('grep 5006016090203181 /sys/class/fc_transport/'
'target10:*/node_name'),
mock.call('tee', '-a', '/sys/class/scsi_host/host10/scan',
process_input='- - 1',
root_helper=None, run_as_root=True),
mock.call('grep 5006016090203181 /sys/class/fc_transport/'
'target11:*/node_name'),
mock.call('tee', '-a', '/sys/class/scsi_host/host11/scan',
process_input='- - 1',
root_helper=None, run_as_root=True)]

execute_mock.assert_has_calls(expected_commands)
self.assertEqual(len(expected_commands), execute_mock.call_count)

def test_get_fc_hbas_fail(self):
def fake_exec1(a, b, c, d, run_as_root=True, root_helper='sudo'):


Loading…
Cancel
Save