From 2e46deeaa109f46686171e9af6ea12e94eb9e1f7 Mon Sep 17 00:00:00 2001 From: Gorka Eguileor Date: Mon, 14 Oct 2019 16:42:22 +0200 Subject: [PATCH] Fix FC scan too broad MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After some changes to the FC connector we have introduced a regression on the way we do the scans, and we end up scanning using wildcards even though we shouldn't. The targets in the "initiator_target_map" don't mean that they are all connected, so we must take that into account. With the current code, if we have the following connections: HBA host7 ---- SWITCH ---- port W (channel 0, target 2)           \--- SWITCH ---- port X (channel 0, target 3) HBA host8 ---- SWITCH ---- port Y (channel 0, target 2)           \--- SWITCH ---- port Z (channel 0, target 3) We will end up with the following scans 8 scans for LUN L:   - - L > host7   - - L > host7   0 2 L > host7   0 3 L > host7   0 2 L > host8   0 3 L > host8   - - L > host8   - - L > host8 Which correspond to the responses from _get_hba_channel_scsi_target like this:                port Y port Z port W port X   host7 ... ['-','-',L] ['-','-',L] ['0','2',L] ['0','3',L]   host8 ... ['0','2',L] ['0','3',L] ['-','-',L] ['-','-',L] And we should only be doing 4 scans:   0 2 L > host7   0 3 L > host7   0 2 L > host8   0 3 L > host8 Most storage arrays get their target ports automatically detected by the Linux FC initiator and sysfs gets populated with that information, but there are some that don't. We'll do a narrow scan using the channel, target, and LUN for the former and a wider scan for the latter. If all paths to a former type of array were down on the system boot the array could look like it's of the latter type and make us bring us unwanted volumes into the system by doing a broad scan. To prevent this from happening Cinder drivers can use the "enable_wildcard_scan" key in the connection information to let us know they don't want us to do broad scans even if no target ports are found (because they know the cause is there's no connection). Close-Bug: #1849504 Related-Bug: #1765000 Related-Bug: #1828440 Change-Id: I5dbefaff43fb902b15117b443fc92f7b6a6ad8c9 (cherry picked from commit 708733e49598145eb8e898d486723dc3df00a52f) (cherry picked from commit c76b6dc514b8e17e1211e4a7465a7e2a635adea3) (cherry picked from commit 8933503041529978b9fd816e1788b1b0c343b540) --- os_brick/initiator/linuxfc.py | 99 ++++++++------ os_brick/tests/initiator/test_linuxfc.py | 125 ++++++++++++------ ...ix-fc-scan-too-broad-3c576e1846b7f05f.yaml | 6 + 3 files changed, 149 insertions(+), 81 deletions(-) create mode 100644 releasenotes/notes/fix-fc-scan-too-broad-3c576e1846b7f05f.yaml diff --git a/os_brick/initiator/linuxfc.py b/os_brick/initiator/linuxfc.py index 65ddfff7a..783b07c95 100644 --- a/os_brick/initiator/linuxfc.py +++ b/os_brick/initiator/linuxfc.py @@ -34,23 +34,30 @@ class LinuxFibreChannel(linuxscsi.LinuxSCSI): else: return False - def _get_hba_channel_scsi_target(self, hba, conn_props): - """Try to get the HBA channel and SCSI target for an HBA. + def _get_hba_channel_scsi_target_lun(self, hba, conn_props): + """Get HBA channels, SCSI targets, LUNs to FC targets for given HBA. - This method only works for Fibre Channel targets that implement a - single WWNN for all ports, so caller should expect us to return either - explicit channel and targets or wild cards if we cannot determine them. + Given an HBA and the connection properties we look for the HBA channels + and SCSI targets for each of the FC targets that this HBA has been + granted permission to connect. - The connection properties will need to have "target" values defined in - it which are expected to be tuples of (wwpn, lun). + For drivers that don't return an initiator to target map we try to find + the info for all the target ports. - :returns: List of lists with [c, t, l] entries, the channel and target - may be '-' wildcards if unable to determine them. + For drivers that return an initiator_target_map we use the + initiator_target_lun_map entry that was generated by the FC connector + based on the contents of the connection information data to know which + target ports to look for. + + :returns: 2-Tuple with the first entry being a list of [c, t, l] + entries where the target port was found, and the second entry of the + tuple being a set of luns for ports that were not found. """ - # We want the target's WWPNs, so we use the initiator_target_map if - # present for this hba or default to target_wwns if not present. + # We want the targets' WWPNs, so we use the initiator_target_map if + # present for this hba or default to targets if not present. targets = conn_props['targets'] if conn_props.get('initiator_target_map') is not None: + # This map we try to use was generated by the FC connector targets = conn_props['initiator_target_lun_map'].get( hba['port_name'], targets) @@ -61,6 +68,7 @@ class LinuxFibreChannel(linuxscsi.LinuxSCSI): path = '/sys/class/fc_transport/target%s:' % host_device ctls = [] + luns_not_found = set() for wwpn, lun in targets: cmd = 'grep -Gil "%(wwpns)s" %(path)s*/port_name' % {'wwpns': wwpn, 'path': path} @@ -73,48 +81,63 @@ class LinuxFibreChannel(linuxscsi.LinuxSCSI): LOG.debug('Could not get HBA channel and SCSI target ID, path:' ' %(path)s*, reason: %(reason)s', {'path': path, 'reason': exc}) - # If we didn't find any paths just give back wildcards for - # the channel and target ids. - ctls.append(['-', '-', lun]) - return ctls + # If we didn't find any paths add it to the not found list + luns_not_found.add(lun) + return ctls, luns_not_found def rescan_hosts(self, hbas, connection_properties): LOG.debug('Rescaning HBAs %(hbas)s with connection properties ' '%(conn_props)s', {'hbas': hbas, 'conn_props': connection_properties}) - get_ctsl = self._get_hba_channel_scsi_target - - # Use initiator_target_map provided by backend as HBA exclusion map + # Use initiator_target_lun_map (generated from initiator_target_map by + # the FC connector) as HBA exclusion map ports = connection_properties.get('initiator_target_lun_map') if ports: hbas = [hba for hba in hbas if hba['port_name'] in ports] - LOG.debug('Using initiator target map to exclude HBAs') + LOG.debug('Using initiator target map to exclude HBAs: %s', + hbas) - # Check if target implements single WWNN for all ports, if it does we - # only use HBAs connected (info was found), otherwise we are forced to - # blindly scan all HBAs. We also do this when we have - # initiator_target_map, because some drivers return all HBAs but they - # implement single WWNN and we avoid adding unrelated LUNs this way - # (LP bug#1765000). - with_info = [] - no_info = [] + # Most storage arrays get their target ports automatically detected + # by the Linux FC initiator and sysfs gets populated with that + # information, but there are some that don't. We'll do a narrow scan + # using the channel, target, and LUN for the former and a wider scan + # for the latter. If all paths to a former type of array were down on + # the system boot the array could look like it's of the latter type + # and make us bring us unwanted volumes into the system by doing a + # broad scan. To prevent this from happening Cinder drivers can use + # the "enable_wildcard_scan" key in the connection_info to let us know + # they don't want us to do broad scans even in those cases. + broad_scan = connection_properties.get('enable_wildcard_scan', True) + if not broad_scan: + LOG.debug('Connection info disallows broad SCSI scanning') + process = [] + skipped = [] + get_ctls = self._get_hba_channel_scsi_target_lun for hba in hbas: - ctls = get_ctsl(hba, connection_properties) - found_info = True - for hba_channel, target_id, target_lun in ctls: - if hba_channel == '-' or target_id == '-': - found_info = False - target_list = with_info if found_info else no_info - target_list.append((hba, ctls)) + ctls, luns_wildcards = get_ctls(hba, connection_properties) + # If we found the target ports, ignore HBAs that din't find them + if ctls: + process.append((hba, ctls)) - process = with_info or no_info - msg = "implements" if with_info else "doesn't implement" - LOG.debug('FC target %s single WWNN for all ports.', msg) + # If target ports not found and should have, then the HBA is not + # connected to our storage + elif not broad_scan: + LOG.debug('Skipping HBA %s, nothing to scan, target port ' + 'not connected to initiator', hba['node_name']) + + # If we haven't found any target ports we may need to do broad + # SCSI scans + elif not process: + skipped.append((hba, + [('-', '-', lun) for lun in luns_wildcards])) + + # If we didn't find any target ports use wildcards if they are enabled + process = process or skipped for hba, ctls in process: for hba_channel, target_id, target_lun in ctls: - LOG.debug('Scanning host %(host)s (wwnn: %(wwnn)s, c: ' + LOG.debug('Scanning %(host)s (wwnn: %(wwnn)s, c: ' '%(channel)s, t: %(target)s, l: %(lun)s)', {'host': hba['host_device'], 'wwnn': hba['node_name'], 'channel': hba_channel, diff --git a/os_brick/tests/initiator/test_linuxfc.py b/os_brick/tests/initiator/test_linuxfc.py index 0814bdb11..de67808e2 100644 --- a/os_brick/tests/initiator/test_linuxfc.py +++ b/os_brick/tests/initiator/test_linuxfc.py @@ -80,7 +80,7 @@ class LinuxFCTestCase(base.TestCase): del connection_properties['initiator_target_lun_map'] return hbas, connection_properties - def test__get_hba_channel_scsi_target_single_wwpn(self): + def test__get_hba_channel_scsi_target_lun_single_wwpn(self): execute_results = ('/sys/class/fc_transport/target6:0:1/port_name\n', '') hbas, con_props = self.__get_rescan_info() @@ -88,15 +88,15 @@ class LinuxFCTestCase(base.TestCase): con_props['targets'] = con_props['targets'][0:1] with mock.patch.object(self.lfc, '_execute', return_value=execute_results) as execute_mock: - res = self.lfc._get_hba_channel_scsi_target(hbas[0], con_props) + res = self.lfc._get_hba_channel_scsi_target_lun(hbas[0], con_props) execute_mock.assert_called_once_with( 'grep -Gil "514f0c50023f6c00" ' '/sys/class/fc_transport/target6:*/port_name', shell=True) - expected = [['0', '1', 1]] - self.assertListEqual(expected, res) + expected = ([['0', '1', 1]], set()) + self.assertEqual(expected, res) - def test__get_hba_channel_scsi_target_with_initiator_target_map(self): + def test__get_hba_channel_scsi_target_lun_with_initiator_target_map(self): execute_results = ('/sys/class/fc_transport/target6:0:1/port_name\n', '') hbas, con_props = self.__get_rescan_info(zone_manager=True) @@ -105,15 +105,16 @@ class LinuxFCTestCase(base.TestCase): hbas[0]['port_name'] = '50014380186af83e' with mock.patch.object(self.lfc, '_execute', return_value=execute_results) as execute_mock: - res = self.lfc._get_hba_channel_scsi_target(hbas[0], con_props) + res = self.lfc._get_hba_channel_scsi_target_lun(hbas[0], con_props) execute_mock.assert_called_once_with( 'grep -Gil "514f0c50023f6c01" ' '/sys/class/fc_transport/target6:*/port_name', shell=True) - expected = [['0', '1', 1]] - self.assertListEqual(expected, res) + expected = ([['0', '1', 1]], set()) + self.assertEqual(expected, res) - def test__get_hba_channel_scsi_target_with_initiator_target_map_none(self): + def test__get_hba_channel_scsi_target_lun_with_initiator_target_map_none( + self): execute_results = ('/sys/class/fc_transport/target6:0:1/port_name\n', '') hbas, con_props = self.__get_rescan_info() @@ -123,15 +124,15 @@ class LinuxFCTestCase(base.TestCase): hbas[0]['port_name'] = '50014380186af83e' with mock.patch.object(self.lfc, '_execute', return_value=execute_results) as execute_mock: - res = self.lfc._get_hba_channel_scsi_target(hbas[0], con_props) + res = self.lfc._get_hba_channel_scsi_target_lun(hbas[0], con_props) execute_mock.assert_called_once_with( 'grep -Gil "514f0c50023f6c00" ' '/sys/class/fc_transport/target6:*/port_name', shell=True) - expected = [['0', '1', 1]] - self.assertListEqual(expected, res) + expected = ([['0', '1', 1]], set()) + self.assertEqual(expected, res) - def test__get_hba_channel_scsi_target_multiple_wwpn(self): + def test__get_hba_channel_scsi_target_lun_multiple_wwpn(self): execute_results = [ ['/sys/class/fc_transport/target6:0:1/port_name\n', ''], ['/sys/class/fc_transport/target6:0:2/port_name\n', ''], @@ -139,7 +140,7 @@ class LinuxFCTestCase(base.TestCase): hbas, con_props = self.__get_rescan_info() with mock.patch.object(self.lfc, '_execute', side_effect=execute_results) as execute_mock: - res = self.lfc._get_hba_channel_scsi_target(hbas[0], con_props) + res = self.lfc._get_hba_channel_scsi_target_lun(hbas[0], con_props) expected_cmds = [ mock.call('grep -Gil "514f0c50023f6c00" ' '/sys/class/fc_transport/target6:*/port_name', @@ -150,10 +151,10 @@ class LinuxFCTestCase(base.TestCase): ] execute_mock.assert_has_calls(expected_cmds) - expected = [['0', '1', 1], ['0', '2', 1]] - self.assertListEqual(expected, res) + expected = ([['0', '1', 1], ['0', '2', 1]], set()) + self.assertEqual(expected, res) - def test__get_hba_channel_scsi_target_multiple_wwpn_and_luns(self): + def test__get_hba_channel_scsi_target_lun_multiple_wwpn_and_luns(self): execute_results = [ ['/sys/class/fc_transport/target6:0:1/port_name\n', ''], ['/sys/class/fc_transport/target6:0:2/port_name\n', ''], @@ -166,7 +167,7 @@ class LinuxFCTestCase(base.TestCase): ] with mock.patch.object(self.lfc, '_execute', side_effect=execute_results) as execute_mock: - res = self.lfc._get_hba_channel_scsi_target(hbas[0], con_props) + res = self.lfc._get_hba_channel_scsi_target_lun(hbas[0], con_props) expected_cmds = [ mock.call('grep -Gil "514f0c50023f6c00" ' '/sys/class/fc_transport/target6:*/port_name', @@ -177,48 +178,71 @@ class LinuxFCTestCase(base.TestCase): ] execute_mock.assert_has_calls(expected_cmds) - expected = [['0', '1', 1], ['0', '2', 7]] - self.assertListEqual(expected, res) + expected = ([['0', '1', 1], ['0', '2', 7]], set()) + self.assertEqual(expected, res) - def test__get_hba_channel_scsi_target_zone_manager(self): + def test__get_hba_channel_scsi_target_lun_zone_manager(self): execute_results = ('/sys/class/fc_transport/target6:0:1/port_name\n', '') hbas, con_props = self.__get_rescan_info(zone_manager=True) with mock.patch.object(self.lfc, '_execute', return_value=execute_results) as execute_mock: - res = self.lfc._get_hba_channel_scsi_target(hbas[0], con_props) + res = self.lfc._get_hba_channel_scsi_target_lun(hbas[0], con_props) execute_mock.assert_called_once_with( 'grep -Gil "514f0c50023f6c00" ' '/sys/class/fc_transport/target6:*/port_name', shell=True) - expected = [['0', '1', 1]] - self.assertListEqual(expected, res) + expected = ([['0', '1', 1]], set()) + self.assertEqual(expected, res) - def test__get_hba_channel_scsi_target_not_found(self): + def test__get_hba_channel_scsi_target_lun_not_found(self): hbas, con_props = self.__get_rescan_info(zone_manager=True) with mock.patch.object(self.lfc, '_execute', return_value=('', '')) as execute_mock: - res = self.lfc._get_hba_channel_scsi_target(hbas[0], con_props) + res = self.lfc._get_hba_channel_scsi_target_lun(hbas[0], con_props) execute_mock.assert_called_once_with( 'grep -Gil "514f0c50023f6c00" ' '/sys/class/fc_transport/target6:*/port_name', shell=True) - self.assertListEqual([], res) + self.assertEqual(([], set()), res) - def test__get_hba_channel_scsi_target_exception(self): + def test__get_hba_channel_scsi_target_lun_exception(self): hbas, con_props = self.__get_rescan_info(zone_manager=True) with mock.patch.object(self.lfc, '_execute', side_effect=Exception) as execute_mock: - res = self.lfc._get_hba_channel_scsi_target(hbas[0], con_props) + res = self.lfc._get_hba_channel_scsi_target_lun(hbas[0], con_props) execute_mock.assert_called_once_with( 'grep -Gil "514f0c50023f6c00" ' '/sys/class/fc_transport/target6:*/port_name', shell=True) - self.assertEqual(res, [['-', '-', 1]]) + self.assertEqual(([], {1}), res) + + def test__get_hba_channel_scsi_target_lun_some_exception(self): + execute_effects = [ + ('/sys/class/fc_transport/target6:0:1/port_name\n', ''), + Exception() + ] + expected_cmds = [ + mock.call('grep -Gil "514f0c50023f6c00" ' + '/sys/class/fc_transport/target6:*/port_name', + shell=True), + mock.call('grep -Gil "514f0c50023f6c01" ' + '/sys/class/fc_transport/target6:*/port_name', + shell=True), + ] + hbas, con_props = self.__get_rescan_info() + + with mock.patch.object(self.lfc, '_execute', + side_effect=execute_effects) as execute_mock: + res = self.lfc._get_hba_channel_scsi_target_lun(hbas[0], con_props) + execute_mock.assert_has_calls(expected_cmds) + expected = ([['0', '1', 1]], {1}) + self.assertEqual(expected, res) def test_rescan_hosts_initiator_map(self): """Test FC rescan with initiator map and not every HBA connected.""" - get_chan_results = [[['2', '3', 1], ['4', '5', 1]], [['6', '7', 1]]] + get_chan_results = [([['2', '3', 1], ['4', '5', 1]], set()), + ([['6', '7', 1]], set())] hbas, con_props = self.__get_rescan_info(zone_manager=True) @@ -232,7 +256,7 @@ class LinuxFCTestCase(base.TestCase): with mock.patch.object(self.lfc, '_execute', return_value=None) as execute_mock, \ - mock.patch.object(self.lfc, '_get_hba_channel_scsi_target', + mock.patch.object(self.lfc, '_get_hba_channel_scsi_target_lun', side_effect=get_chan_results) as mock_get_chan: self.lfc.rescan_hosts(hbas, con_props) @@ -257,9 +281,9 @@ class LinuxFCTestCase(base.TestCase): def test_rescan_hosts_single_wwnn(self): """Test FC rescan with no initiator map and single WWNN for ports.""" get_chan_results = [ - [['2', '3', 1], ['4', '5', 1]], - [['6', '7', 1]], - [['-', '-', 1]], + [[['2', '3', 1], ['4', '5', 1]], set()], + [[['6', '7', 1]], set()], + [[], {1}], ] hbas, con_props = self.__get_rescan_info(zone_manager=False) @@ -273,7 +297,7 @@ class LinuxFCTestCase(base.TestCase): with mock.patch.object(self.lfc, '_execute', return_value=None) as execute_mock, \ - mock.patch.object(self.lfc, '_get_hba_channel_scsi_target', + mock.patch.object(self.lfc, '_get_hba_channel_scsi_target_lun', side_effect=get_chan_results) as mock_get_chan: self.lfc.rescan_hosts(hbas, con_props) @@ -297,13 +321,14 @@ class LinuxFCTestCase(base.TestCase): def test_rescan_hosts_initiator_map_single_wwnn(self): """Test FC rescan with initiator map and single WWNN.""" - get_chan_results = [[['2', '3', 1], ['4', '5', 1]], [['-', '-', 1]]] + get_chan_results = [([['2', '3', 1], ['4', '5', 1]], set()), + ([], {1})] hbas, con_props = self.__get_rescan_info(zone_manager=True) with mock.patch.object(self.lfc, '_execute', return_value=None) as execute_mock, \ - mock.patch.object(self.lfc, '_get_hba_channel_scsi_target', + mock.patch.object(self.lfc, '_get_hba_channel_scsi_target_lun', side_effect=get_chan_results) as mock_get_chan: self.lfc.rescan_hosts(hbas, con_props) @@ -322,14 +347,14 @@ class LinuxFCTestCase(base.TestCase): mock.call(hbas[1], con_props)] mock_get_chan.assert_has_calls(expected_calls) - def test_rescan_hosts_wildcard(self): - """Test when we don't have initiator map or target is single WWNN.""" - get_chan_results = [[['-', '-', 1]], [['-', '-', 1]]] + def test_rescan_hosts_port_not_found(self): + """Test when we don't find the target ports.""" + get_chan_results = [([], {1}), ([], {1})] hbas, con_props = self.__get_rescan_info(zone_manager=True) # Remove the initiator map con_props.pop('initiator_target_map') con_props.pop('initiator_target_lun_map') - with mock.patch.object(self.lfc, '_get_hba_channel_scsi_target', + with mock.patch.object(self.lfc, '_get_hba_channel_scsi_target_lun', side_effect=get_chan_results), \ mock.patch.object(self.lfc, '_execute', side_effect=None) as execute_mock: @@ -343,10 +368,22 @@ class LinuxFCTestCase(base.TestCase): mock.call('tee', '-a', '/sys/class/scsi_host/host7/scan', process_input='- - 1', root_helper=None, run_as_root=True)] - execute_mock.assert_has_calls(expected_commands) self.assertEqual(len(expected_commands), execute_mock.call_count) + def test_rescan_hosts_port_not_found_driver_disables_wildcards(self): + """Test when we don't find the target ports but driver forces scan.""" + get_chan_results = [([], {1}), ([], {1})] + hbas, con_props = self.__get_rescan_info(zone_manager=True) + con_props['enable_wildcard_scan'] = False + with mock.patch.object(self.lfc, '_get_hba_channel_scsi_target_lun', + side_effect=get_chan_results), \ + mock.patch.object(self.lfc, '_execute', + side_effect=None) as execute_mock: + + self.lfc.rescan_hosts(hbas, con_props) + execute_mock.assert_not_called() + def test_get_fc_hbas_fail(self): def fake_exec1(a, b, c, d, run_as_root=True, root_helper='sudo'): raise OSError @@ -408,6 +445,7 @@ class LinuxFCTestCase(base.TestCase): expected_wwnns = ['50014380242b9750', '50014380242b9752'] self.assertEqual(expected_wwnns, wwnns) + SYSTOOL_FC = """ Class = "fc_host" @@ -517,6 +555,7 @@ class LinuxFCS390XTestCase(LinuxFCTestCase): '0.0.2319/0x50014380242b9751/unit_remove')] self.assertEqual(expected_commands, self.cmds) + SYSTOOL_FC_S390X = """ Class = "fc_host" diff --git a/releasenotes/notes/fix-fc-scan-too-broad-3c576e1846b7f05f.yaml b/releasenotes/notes/fix-fc-scan-too-broad-3c576e1846b7f05f.yaml new file mode 100644 index 000000000..6379ef979 --- /dev/null +++ b/releasenotes/notes/fix-fc-scan-too-broad-3c576e1846b7f05f.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Fix an issue where SCSI LUN scans for FC were unnecessarily too broad. + Now OS-Brick will not use wildcards unless it doesn't find any target ports + in sysfs and the Cinder driver doesn't disable them.