From 5c031cb2d9e9b10a33c82586d11139594c96a25f Mon Sep 17 00:00:00 2001 From: Takashi Kajinami Date: Mon, 12 Apr 2021 14:07:46 +0900 Subject: [PATCH] multipath/iscsi: remove devices from multipath monitoring Recent multipathd doesn't remove path devices timely when it receives burst of udev events but wait for a while to start actual removal. Because os-brick removes path devices in a short time during detaching a multipath device, it is likely to hit this burst limit and sometimes path devices are not removed before a subsequent operation is started. This change ensures that os-brick tells mutlipathd to remove path devices when the devices should be deleted, so that orphan paths are not left when starting a subsequent attach operation. Closes-Bug: #1924652 Change-Id: I65204aa7495740dc1545bff2c5c485a8041e7930 (cherry picked from commit 1b2e2295421615847d86508dcd487ec51fa45f25) (cherry picked from commit 0cd58a9b0a5520e184a7d3ef45f03ed8cda93732) (cherry picked from commit a0e995b1308b9a188e8719fdc6d6bad15508dba6) --- os_brick/initiator/linuxscsi.py | 20 +++++ os_brick/tests/initiator/test_linuxscsi.py | 87 +++++++++++++++++-- .../notes/bug-1924652-2323f905f62ef8ba.yaml | 8 ++ 3 files changed, 108 insertions(+), 7 deletions(-) create mode 100644 releasenotes/notes/bug-1924652-2323f905f62ef8ba.yaml diff --git a/os_brick/initiator/linuxscsi.py b/os_brick/initiator/linuxscsi.py index 480503f70..87be4a3ae 100644 --- a/os_brick/initiator/linuxscsi.py +++ b/os_brick/initiator/linuxscsi.py @@ -304,9 +304,21 @@ class LinuxSCSI(executor.Executor): with exc.context(force, 'Flushing %s failed', multipath_name): self.flush_multipath_device(multipath_name) multipath_name = None + multipath_running = True + else: + multipath_running = self.is_multipath_running( + enforce_multipath=False, root_helper=self._root_helper) for device_name in devices_names: dev_path = '/dev/' + device_name + if multipath_running: + # Recent multipathd doesn't remove path devices in time when + # it receives mutiple udev events in a short span, so here we + # tell multipathd to remove the path device immediately. + # Even if this step fails, later removing an iscsi device + # triggers a udev event and multipathd can remove the path + # device based on the udev event + self.multipath_del_path(dev_path) flush = self.requires_flush(dev_path, path_used, was_multipath) self.remove_scsi_device(dev_path, force, exc, flush) @@ -715,3 +727,11 @@ class LinuxSCSI(executor.Executor): check_exit_code=False, root_helper=self._root_helper) return stdout.strip() == 'ok' + + def multipath_del_path(self, realpath): + """Remove a path from multipathd for monitoring.""" + stdout, stderr = self._execute('multipathd', 'del', 'path', realpath, + run_as_root=True, timeout=5, + check_exit_code=False, + root_helper=self._root_helper) + return stdout.strip() == 'ok' diff --git a/os_brick/tests/initiator/test_linuxscsi.py b/os_brick/tests/initiator/test_linuxscsi.py index 75d4a7d40..772a189c2 100644 --- a/os_brick/tests/initiator/test_linuxscsi.py +++ b/os_brick/tests/initiator/test_linuxscsi.py @@ -230,15 +230,20 @@ class LinuxSCSITestCase(base.TestCase): {'do_raise': True, 'force': True}) @ddt.unpack @mock.patch.object(linuxscsi.LinuxSCSI, '_remove_scsi_symlinks') + @mock.patch.object(linuxscsi.LinuxSCSI, 'multipath_del_path') + @mock.patch.object(linuxscsi.LinuxSCSI, 'is_multipath_running', + return_value=True) @mock.patch.object(linuxscsi.LinuxSCSI, 'flush_multipath_device') @mock.patch.object(linuxscsi.LinuxSCSI, 'get_dm_name') @mock.patch.object(linuxscsi.LinuxSCSI, 'find_sysfs_multipath_dm') @mock.patch.object(linuxscsi.LinuxSCSI, 'wait_for_volumes_removal') - @mock.patch('os_brick.initiator.linuxscsi.LinuxSCSI.remove_scsi_device') + @mock.patch.object(linuxscsi.LinuxSCSI, 'remove_scsi_device') def test_remove_connection_multipath_complete(self, remove_mock, wait_mock, find_dm_mock, get_dm_name_mock, flush_mp_mock, + is_mp_running_mock, + mp_del_path_mock, remove_link_mock, do_raise, force): if do_raise: @@ -253,6 +258,9 @@ class LinuxSCSITestCase(base.TestCase): flush_mp_mock.assert_called_once_with(get_dm_name_mock.return_value) self.assertEqual(get_dm_name_mock.return_value if do_raise else None, mp_name) + is_mp_running_mock.assert_not_called() + mp_del_path_mock.assert_has_calls([ + mock.call('/dev/sda'), mock.call('/dev/sdb')]) remove_mock.assert_has_calls([ mock.call('/dev/sda', mock.sentinel.Force, exc, False), mock.call('/dev/sdb', mock.sentinel.Force, exc, False)]) @@ -261,6 +269,46 @@ class LinuxSCSITestCase(base.TestCase): remove_link_mock.assert_called_once_with(devices_names) @mock.patch.object(linuxscsi.LinuxSCSI, '_remove_scsi_symlinks') + @mock.patch.object(linuxscsi.LinuxSCSI, 'multipath_del_path') + @mock.patch.object(linuxscsi.LinuxSCSI, 'is_multipath_running', + return_value=True) + @mock.patch.object(linuxscsi.LinuxSCSI, 'flush_multipath_device') + @mock.patch.object(linuxscsi.LinuxSCSI, 'get_dm_name') + @mock.patch.object(linuxscsi.LinuxSCSI, 'find_sysfs_multipath_dm', + return_value=None) + @mock.patch.object(linuxscsi.LinuxSCSI, 'wait_for_volumes_removal') + @mock.patch.object(linuxscsi.LinuxSCSI, 'remove_scsi_device') + def test_remove_connection_multipath_complete_no_dm(self, remove_mock, + wait_mock, + find_dm_mock, + get_dm_name_mock, + flush_mp_mock, + is_mp_running_mock, + mp_del_path_mock, + remove_link_mock): + devices_names = ('sda', 'sdb') + exc = exception.ExceptionChainer() + mp_name = self.linuxscsi.remove_connection(devices_names, + force=mock.sentinel.Force, + exc=exc) + find_dm_mock.assert_called_once_with(devices_names) + get_dm_name_mock.assert_not_called() + flush_mp_mock.assert_not_called() + self.assertIsNone(mp_name) + is_mp_running_mock.assert_called_once() + mp_del_path_mock.assert_has_calls([ + mock.call('/dev/sda'), mock.call('/dev/sdb')]) + remove_mock.assert_has_calls([ + mock.call('/dev/sda', mock.sentinel.Force, exc, False), + mock.call('/dev/sdb', mock.sentinel.Force, exc, False)]) + wait_mock.assert_called_once_with(devices_names) + self.assertFalse(bool(exc)) + remove_link_mock.assert_called_once_with(devices_names) + + @mock.patch.object(linuxscsi.LinuxSCSI, '_remove_scsi_symlinks') + @mock.patch.object(linuxscsi.LinuxSCSI, 'multipath_del_path') + @mock.patch.object(linuxscsi.LinuxSCSI, 'is_multipath_running', + return_value=True) @mock.patch.object(linuxscsi.LinuxSCSI, 'flush_multipath_device', side_effect=Exception) @mock.patch.object(linuxscsi.LinuxSCSI, 'get_dm_name') @@ -269,7 +317,10 @@ class LinuxSCSITestCase(base.TestCase): @mock.patch.object(linuxscsi.LinuxSCSI, 'remove_scsi_device') def test_remove_connection_multipath_fail(self, remove_mock, wait_mock, find_dm_mock, get_dm_name_mock, - flush_mp_mock, remove_link_mock): + flush_mp_mock, + is_mp_running_mock, + mp_del_path_mock, + remove_link_mock): flush_mp_mock.side_effect = exception.ExceptionChainer devices_names = ('sda', 'sdb') exc = exception.ExceptionChainer() @@ -279,18 +330,25 @@ class LinuxSCSITestCase(base.TestCase): find_dm_mock.assert_called_once_with(devices_names) get_dm_name_mock.assert_called_once_with(find_dm_mock.return_value) flush_mp_mock.assert_called_once_with(get_dm_name_mock.return_value) + is_mp_running_mock.assert_not_called() + mp_del_path_mock.assert_not_called() remove_mock.assert_not_called() wait_mock.assert_not_called() remove_link_mock.assert_not_called() self.assertTrue(bool(exc)) - @mock.patch.object(linuxscsi.LinuxSCSI, 'find_sysfs_multipath_dm') @mock.patch.object(linuxscsi.LinuxSCSI, '_remove_scsi_symlinks') + @mock.patch.object(linuxscsi.LinuxSCSI, 'multipath_del_path') + @mock.patch.object(linuxscsi.LinuxSCSI, 'is_multipath_running', + return_value=True) + @mock.patch.object(linuxscsi.LinuxSCSI, 'find_sysfs_multipath_dm') @mock.patch.object(linuxscsi.LinuxSCSI, 'wait_for_volumes_removal') @mock.patch.object(linuxscsi.LinuxSCSI, 'remove_scsi_device') def test_remove_connection_singlepath_no_path(self, remove_mock, wait_mock, - remove_link_mock, - find_dm_mock): + find_dm_mock, + is_mp_running_mock, + mp_del_path_mock, + remove_link_mock): # Test remove connection when we didn't form a multipath and didn't # even use any of the devices that were found. This means that we # don't flush any of the single paths when removing them. @@ -301,18 +359,27 @@ class LinuxSCSITestCase(base.TestCase): force=mock.sentinel.Force, exc=exc) find_dm_mock.assert_called_once_with(devices_names) + is_mp_running_mock.assert_called_once() + mp_del_path_mock.assert_has_calls([ + mock.call('/dev/sda'), mock.call('/dev/sdb')]) remove_mock.assert_has_calls( [mock.call('/dev/sda', mock.sentinel.Force, exc, False), mock.call('/dev/sdb', mock.sentinel.Force, exc, False)]) wait_mock.assert_called_once_with(devices_names) remove_link_mock.assert_called_once_with(devices_names) - @mock.patch.object(linuxscsi.LinuxSCSI, 'find_sysfs_multipath_dm') @mock.patch.object(linuxscsi.LinuxSCSI, '_remove_scsi_symlinks') + @mock.patch.object(linuxscsi.LinuxSCSI, 'multipath_del_path') + @mock.patch.object(linuxscsi.LinuxSCSI, 'is_multipath_running', + return_value=False) + @mock.patch.object(linuxscsi.LinuxSCSI, 'find_sysfs_multipath_dm') @mock.patch.object(linuxscsi.LinuxSCSI, 'wait_for_volumes_removal') @mock.patch.object(linuxscsi.LinuxSCSI, 'remove_scsi_device') def test_remove_connection_singlepath_used(self, remove_mock, wait_mock, - remove_link_mock, find_dm_mock): + find_dm_mock, + is_mp_running_mock, + mp_del_path_mock, + remove_link_mock): # Test remove connection when we didn't form a multipath and just used # one of the single paths that were found. This means that we don't # flush any of the single paths when removing them. @@ -327,6 +394,8 @@ class LinuxSCSITestCase(base.TestCase): exc=exc, path_used='/dev/sdb', was_multipath=False) find_dm_mock.assert_called_once_with(devices_names) + is_mp_running_mock.assert_called_once() + mp_del_path_mock.assert_not_called() remove_mock.assert_has_calls( [mock.call('/dev/sda', mock.sentinel.Force, exc, False), mock.call('/dev/sdb', mock.sentinel.Force, exc, True)]) @@ -1122,6 +1191,10 @@ loop0 0""" self.linuxscsi.multipath_add_path('/dev/sda') self.assertEqual(['multipathd add path /dev/sda'], self.cmds) + def test_multipath_del_path(self): + self.linuxscsi.multipath_del_path('/dev/sda') + self.assertEqual(['multipathd del path /dev/sda'], self.cmds) + @ddt.data({'con_props': {}, 'dev_info': {'path': mock.sentinel.path}}, {'con_props': None, 'dev_info': {'path': mock.sentinel.path}}, {'con_props': {'device_path': mock.sentinel.device_path}, diff --git a/releasenotes/notes/bug-1924652-2323f905f62ef8ba.yaml b/releasenotes/notes/bug-1924652-2323f905f62ef8ba.yaml new file mode 100644 index 000000000..a6aad780f --- /dev/null +++ b/releasenotes/notes/bug-1924652-2323f905f62ef8ba.yaml @@ -0,0 +1,8 @@ +--- +fixes: + - | + `Bug #1924652 `_: Fix + issue with newer multipathd implementations where path devices are kept + in multipathd even after volume detachment completes, preventing it from + creating a multipath device when a new device attachment is made shortly + with the same volume device or the same device path.