diff --git a/os_brick/exception.py b/os_brick/exception.py index 49e5821a2..8120de0e2 100644 --- a/os_brick/exception.py +++ b/os_brick/exception.py @@ -98,5 +98,9 @@ class VolumeDeviceNotFound(BrickException): message = _("Volume device not found at %(device)s.") +class VolumePathNotRemoved(BrickException): + message = _("Volume path %(volume_path)s was not removed in time.") + + class ProtocolNotSupported(BrickException): message = _("Connect to volume via protocol %(protocol)s not supported.") diff --git a/os_brick/initiator/connector.py b/os_brick/initiator/connector.py index b584e9f98..4a5f46e27 100644 --- a/os_brick/initiator/connector.py +++ b/os_brick/initiator/connector.py @@ -412,6 +412,13 @@ class ISCSIConnector(InitiatorConnector): if dev_name: self._linuxscsi.remove_scsi_device(dev_name) + # NOTE(jdg): On busy systems we can have a race here + # where remove_iscsi_device is called before the device file + # has actually been removed. The result is an orphaned + # iscsi session that never gets logged out. The following + # call to wait addresses that issue. + self._linuxscsi.wait_for_volume_removal(host_device) + # NOTE(vish): Only disconnect from the target if no luns from the # target are in use. device_prefix = ("/dev/disk/by-path/ip-%(portal)s-iscsi-%(iqn)s-lun-" % diff --git a/os_brick/initiator/linuxscsi.py b/os_brick/initiator/linuxscsi.py index a529a6cdd..838dc1d73 100644 --- a/os_brick/initiator/linuxscsi.py +++ b/os_brick/initiator/linuxscsi.py @@ -22,8 +22,10 @@ import re from oslo_concurrency import processutils as putils from oslo_log import log as logging +from os_brick import exception from os_brick import executor from os_brick.i18n import _LW +from os_brick import utils LOG = logging.getLogger(__name__) @@ -67,6 +69,19 @@ class LinuxSCSI(executor.Executor): {'device': device, 'path': path}) self.echo_scsi_command(path, "1") + @utils.retry(exceptions=exception.VolumePathNotRemoved, retries=3, + backoff_rate=1) + def wait_for_volume_removal(self, volume_path): + """This is used to ensure that volumes are gone.""" + LOG.debug("Checking to see if SCSI volume %s has been removed.", + volume_path) + if os.path.exists(volume_path): + LOG.debug("%(path)s still exists.", {'path': volume_path}) + raise exception.VolumePathNotRemoved( + volume_path=volume_path) + else: + LOG.debug("SCSI volume %s has been removed.", volume_path) + def get_device_info(self, device): (out, _err) = self._execute('sg_scan', device, run_as_root=True, root_helper=self._root_helper) diff --git a/os_brick/tests/initiator/test_connector.py b/os_brick/tests/initiator/test_connector.py index b93107eb9..3039c942d 100644 --- a/os_brick/tests/initiator/test_connector.py +++ b/os_brick/tests/initiator/test_connector.py @@ -234,9 +234,13 @@ class ISCSIConnectorTestCase(ConnectorTestCase): initiator = self.connector.get_initiator() self.assertEqual(initiator, 'iqn.1234-56.foo.bar:01:23456789abc') - @mock.patch.object(os.path, 'exists', return_value=True) def _test_connect_volume(self, extra_props, additional_commands, - exists_mock): + disconnect_mock=None): + # for making sure the /dev/disk/by-path is gone + exists_mock = mock.Mock() + exists_mock.return_value = True + os.path.exists = exists_mock + location = '10.0.2.15:3260' name = 'volume-00000001' iqn = 'iqn.2010-10.org.openstack:%s' % name @@ -249,29 +253,40 @@ class ISCSIConnectorTestCase(ConnectorTestCase): self.assertEqual(device['type'], 'block') self.assertEqual(device['path'], dev_str) - self.connector.disconnect_volume(connection_info['data'], device) - expected_commands = [('iscsiadm -m node -T %s -p %s' % - (iqn, location)), - ('iscsiadm -m session'), - ('iscsiadm -m node -T %s -p %s --login' % - (iqn, location)), - ('iscsiadm -m node -T %s -p %s --op update' - ' -n node.startup -v automatic' - % (iqn, location)), - ('iscsiadm -m node --rescan'), - ('iscsiadm -m session --rescan'), - ('blockdev --flushbufs /dev/sdb'), - ('tee -a /sys/block/sdb/device/delete'), - ('iscsiadm -m node -T %s -p %s --op update' - ' -n node.startup -v manual' % (iqn, location)), - ('iscsiadm -m node -T %s -p %s --logout' % - (iqn, location)), - ('iscsiadm -m node -T %s -p %s --op delete' % - (iqn, location)), ] + additional_commands - LOG.debug("self.cmds = %s" % self.cmds) - LOG.debug("expected = %s" % expected_commands) + self.count = 0 - self.assertEqual(expected_commands, self.cmds) + def mock_exists_effect(*args, **kwargs): + self.count = self.count + 1 + if self.count == 4: + return False + else: + return True + + if disconnect_mock is None: + disconnect_mock = mock_exists_effect + + with mock.patch.object(os.path, 'exists', + side_effect=disconnect_mock): + self.connector.disconnect_volume(connection_info['data'], device) + expected_commands = [ + ('iscsiadm -m node -T %s -p %s' % (iqn, location)), + ('iscsiadm -m session'), + ('iscsiadm -m node -T %s -p %s --login' % (iqn, location)), + ('iscsiadm -m node -T %s -p %s --op update' + ' -n node.startup -v automatic' % (iqn, location)), + ('iscsiadm -m node --rescan'), + ('iscsiadm -m session --rescan'), + ('blockdev --flushbufs /dev/sdb'), + ('tee -a /sys/block/sdb/device/delete'), + ('iscsiadm -m node -T %s -p %s --op update' + ' -n node.startup -v manual' % (iqn, location)), + ('iscsiadm -m node -T %s -p %s --logout' % (iqn, location)), + ('iscsiadm -m node -T %s -p %s --op delete' % + (iqn, location)), ] + additional_commands + LOG.debug("self.cmds = %s", self.cmds) + LOG.debug("expected = %s", expected_commands) + + self.assertEqual(expected_commands, self.cmds) @testtools.skipUnless(os.path.exists('/dev/disk/by-path'), 'Test requires /dev/disk/by-path') @@ -297,7 +312,20 @@ class ISCSIConnectorTestCase(ConnectorTestCase): (iqn2, location2)), ('iscsiadm -m node -T %s -p %s --op delete' % (iqn2, location2))] - self._test_connect_volume(extra_props, additional_commands) + + def mock_exists_effect(*args, **kwargs): + self.count = self.count + 1 + # we have 2 targets in this test, so we need + # to make sure we remove and detect removal + # for both. + if (self.count == 4 or + self.count == 8): + return False + else: + return True + + self._test_connect_volume(extra_props, additional_commands, + disconnect_mock=mock_exists_effect) @testtools.skipUnless(os.path.exists('/dev/disk/by-path'), 'Test requires /dev/disk/by-path') @@ -338,17 +366,19 @@ class ISCSIConnectorTestCase(ConnectorTestCase): check_exit_code=[0, 255]) mock_iscsiadm.reset_mock() - self.connector.disconnect_volume(connection_info['data'], device) - props = connection_info['data'].copy() - for key in ('target_portals', 'target_iqns', 'target_luns'): - props.pop(key, None) - mock_iscsiadm.assert_any_call(props, ('--logout',), - check_exit_code=[0, 21, 255]) - props['target_portal'] = location2 - props['target_iqn'] = iqn2 - props['target_lun'] = 2 - mock_iscsiadm.assert_any_call(props, ('--logout',), - check_exit_code=[0, 21, 255]) + with mock.patch.object(os.path, 'exists', + return_value=False): + self.connector.disconnect_volume(connection_info['data'], device) + props = connection_info['data'].copy() + for key in ('target_portals', 'target_iqns', 'target_luns'): + props.pop(key, None) + mock_iscsiadm.assert_any_call(props, ('--logout',), + check_exit_code=[0, 21, 255]) + props['target_portal'] = location2 + props['target_iqn'] = iqn2 + props['target_lun'] = 2 + mock_iscsiadm.assert_any_call(props, ('--logout',), + check_exit_code=[0, 21, 255]) @mock.patch.object(connector.ISCSIConnector, '_run_iscsiadm_bare') @mock.patch.object(connector.ISCSIConnector, diff --git a/os_brick/tests/initiator/test_linuxscsi.py b/os_brick/tests/initiator/test_linuxscsi.py index cfc9fd05e..0d19e06eb 100644 --- a/os_brick/tests/initiator/test_linuxscsi.py +++ b/os_brick/tests/initiator/test_linuxscsi.py @@ -19,6 +19,7 @@ import string import mock from oslo_log import log as logging +from os_brick import exception from os_brick.initiator import linuxscsi from os_brick.tests import base @@ -67,6 +68,22 @@ class LinuxSCSITestCase(base.TestCase): ('tee -a /sys/block/sdc/device/delete')] self.assertEqual(expected_commands, self.cmds) + def test_wait_for_volume_removal(self): + fake_path = '/dev/disk/by-path/fake-iscsi-iqn-lun-0' + exists_mock = mock.Mock() + exists_mock.return_value = True + os.path.exists = exists_mock + self.assertRaises(exception.VolumePathNotRemoved, + self.linuxscsi.wait_for_volume_removal, + fake_path) + + exists_mock = mock.Mock() + exists_mock.return_value = False + os.path.exists = exists_mock + self.linuxscsi.wait_for_volume_removal(fake_path) + expected_commands = [] + self.assertEqual(expected_commands, self.cmds) + def test_flush_multipath_device(self): self.linuxscsi.flush_multipath_device('/dev/dm-9') expected_commands = [('multipath -f /dev/dm-9')] diff --git a/os_brick/tests/test_utils.py b/os_brick/tests/test_utils.py new file mode 100644 index 000000000..01229f33c --- /dev/null +++ b/os_brick/tests/test_utils.py @@ -0,0 +1,111 @@ +# (c) Copyright 2015 Hewlett-Packard Development Company, L.P. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import time + +import mock +from oslo_log import log as logging + +from os_brick import exception +from os_brick.tests import base +from os_brick import utils + + +LOG = logging.getLogger(__name__) + + +class WrongException(exception.BrickException): + pass + + +class TestRetryDecorator(base.TestCase): + + def test_no_retry_required(self): + self.counter = 0 + + with mock.patch.object(time, 'sleep') as mock_sleep: + @utils.retry(exceptions=exception.VolumeDeviceNotFound, + interval=2, + retries=3, + backoff_rate=2) + def succeeds(): + self.counter += 1 + return 'success' + + ret = succeeds() + self.assertFalse(mock_sleep.called) + self.assertEqual(ret, 'success') + self.assertEqual(self.counter, 1) + + def test_retries_once(self): + self.counter = 0 + interval = 2 + backoff_rate = 2 + retries = 3 + + with mock.patch.object(time, 'sleep') as mock_sleep: + @utils.retry(exception.VolumeDeviceNotFound, + interval, + retries, + backoff_rate) + def fails_once(): + self.counter += 1 + if self.counter < 2: + raise exception.VolumeDeviceNotFound(device='fake') + else: + return 'success' + + ret = fails_once() + self.assertEqual(ret, 'success') + self.assertEqual(self.counter, 2) + self.assertEqual(mock_sleep.call_count, 1) + mock_sleep.assert_called_with(interval * backoff_rate) + + def test_limit_is_reached(self): + self.counter = 0 + retries = 3 + interval = 2 + backoff_rate = 4 + + with mock.patch.object(time, 'sleep') as mock_sleep: + @utils.retry(exception.VolumeDeviceNotFound, + interval, + retries, + backoff_rate) + def always_fails(): + self.counter += 1 + raise exception.VolumeDeviceNotFound(device='fake') + + self.assertRaises(exception.VolumeDeviceNotFound, + always_fails) + self.assertEqual(retries, self.counter) + + expected_sleep_arg = [] + + for i in xrange(retries): + if i > 0: + interval *= backoff_rate + expected_sleep_arg.append(float(interval)) + + mock_sleep.assert_has_calls(map(mock.call, expected_sleep_arg)) + + def test_wrong_exception_no_retry(self): + + with mock.patch.object(time, 'sleep') as mock_sleep: + @utils.retry(exceptions=exception.VolumeDeviceNotFound) + def raise_unexpected_error(): + raise WrongException("wrong exception") + + self.assertRaises(WrongException, raise_unexpected_error) + self.assertFalse(mock_sleep.called) diff --git a/os_brick/utils.py b/os_brick/utils.py new file mode 100644 index 000000000..5bd7f612b --- /dev/null +++ b/os_brick/utils.py @@ -0,0 +1,58 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +"""Utilities and helper functions.""" + +from oslo_log import log as logging +import retrying +import six + +from os_brick.i18n import _ + + +LOG = logging.getLogger(__name__) + + +def retry(exceptions, interval=1, retries=3, backoff_rate=2): + + def _retry_on_exception(e): + return isinstance(e, exceptions) + + def _backoff_sleep(previous_attempt_number, delay_since_first_attempt_ms): + exp = backoff_rate ** previous_attempt_number + wait_for = max(0, interval * exp) + LOG.debug("Sleeping for %s seconds", wait_for) + return wait_for * 1000.0 + + def _print_stop(previous_attempt_number, delay_since_first_attempt_ms): + delay_since_first_attempt = delay_since_first_attempt_ms / 1000.0 + LOG.debug("Failed attempt %s", previous_attempt_number) + LOG.debug("Have been at this for %s seconds", + delay_since_first_attempt) + return previous_attempt_number == retries + + if retries < 1: + raise ValueError(_('Retries must be greater than or ' + 'equal to 1 (received: %s). ') % retries) + + def _decorator(f): + + @six.wraps(f) + def _wrapper(*args, **kwargs): + r = retrying.Retrying(retry_on_exception=_retry_on_exception, + wait_func=_backoff_sleep, + stop_func=_print_stop) + return r.call(f, *args, **kwargs) + + return _wrapper + + return _decorator diff --git a/requirements.txt b/requirements.txt index 783b51d17..267a4f548 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,5 @@ oslo.log>=1.0.0,<1.1.0 oslo.serialization>=1.4.0,<1.5.0 oslo.i18n>=1.5.0,<1.6.0 oslo.utils>=1.4.0,<1.5.0 +retrying>=1.2.3,!=1.3.0 six>=1.9.0