Succeed on iSCSI detach when path just went down
If the iSCSI connection to a device goes down right after we flush it,
or if one of the paths of a multipath device goes down right before we
start disconnecting, the detach will fail even though it should succeed.
We'll see a VolumePathNotRemoved exception listing volumes that had not
disappeared.
This happens because, under those circumstances, it may take up to 30
seconds for the SCSI device to be removed from /dev, but expect it to
disappear in 6 seconds (first check happens, immediately, then another
in 2 seconds, and another in 4 seconds).
Since the device will be removed if we wait a bit more, this patch makes
it so that we wait for up to 30 seconds for the removal.
To ensure we wait as little time as possible, we change the way we wait
for the devices to be removed. Instead of checking, sleeping for 2 and
then for 4 seconds, and then checking again, we just sleep 500ms between
checks, and we do the DEBUG log every 5 seconds.
Change-Id: If801dfc2462c0d3f986eebd4108087139934610d
Closes-Bug: #1794829
(cherry-picked from b9c7bc2b59
)
This commit is contained in:
parent
f2fed213f6
commit
b75411de2b
|
@ -19,10 +19,11 @@
|
||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import six
|
import time
|
||||||
|
|
||||||
from oslo_concurrency import processutils as putils
|
from oslo_concurrency import processutils as putils
|
||||||
from oslo_log import log as logging
|
from oslo_log import log as logging
|
||||||
|
import six
|
||||||
|
|
||||||
from os_brick import exception
|
from os_brick import exception
|
||||||
from os_brick import executor
|
from os_brick import executor
|
||||||
|
@ -76,18 +77,28 @@ class LinuxSCSI(executor.Executor):
|
||||||
with exc.context(force, 'Removing %s failed', device):
|
with exc.context(force, 'Removing %s failed', device):
|
||||||
self.echo_scsi_command(path, "1")
|
self.echo_scsi_command(path, "1")
|
||||||
|
|
||||||
@utils.retry(exceptions=exception.VolumePathNotRemoved)
|
|
||||||
def wait_for_volumes_removal(self, volumes_names):
|
def wait_for_volumes_removal(self, volumes_names):
|
||||||
"""Wait for device paths to be removed from the system."""
|
"""Wait for device paths to be removed from the system."""
|
||||||
str_names = ', '.join(volumes_names)
|
str_names = ', '.join(volumes_names)
|
||||||
LOG.debug('Checking to see if SCSI volumes %s have been removed.',
|
LOG.debug('Checking to see if SCSI volumes %s have been removed.',
|
||||||
str_names)
|
str_names)
|
||||||
exist = [volume_name for volume_name in volumes_names
|
exist = ['/dev/' + volume_name for volume_name in volumes_names]
|
||||||
if os.path.exists('/dev/' + volume_name)]
|
|
||||||
if exist:
|
# It can take up to 30 seconds to remove a SCSI device if the path
|
||||||
LOG.debug('%s still exist.', ', '.join(exist))
|
# failed right before we start detaching, which is unlikely, but we
|
||||||
raise exception.VolumePathNotRemoved(volume_path=exist)
|
# still shouldn't fail in that case.
|
||||||
LOG.debug("SCSI volumes %s have been removed.", str_names)
|
for i in range(61):
|
||||||
|
exist = [path for path in exist if os.path.exists(path)]
|
||||||
|
if not exist:
|
||||||
|
LOG.debug("SCSI volumes %s have been removed.", str_names)
|
||||||
|
return
|
||||||
|
# Don't sleep on the last try since we are quitting
|
||||||
|
if i < 60:
|
||||||
|
time.sleep(0.5)
|
||||||
|
# Log every 5 seconds
|
||||||
|
if i % 10 == 0:
|
||||||
|
LOG.debug('%s still exist.', ', '.join(exist))
|
||||||
|
raise exception.VolumePathNotRemoved(volume_path=exist)
|
||||||
|
|
||||||
def get_device_info(self, device):
|
def get_device_info(self, device):
|
||||||
(out, _err) = self._execute('sg_scan', device, run_as_root=True,
|
(out, _err) = self._execute('sg_scan', device, run_as_root=True,
|
||||||
|
|
|
@ -106,7 +106,7 @@ class LinuxSCSITestCase(base.TestCase):
|
||||||
@mock.patch('time.sleep')
|
@mock.patch('time.sleep')
|
||||||
@mock.patch('os.path.exists', return_value=True)
|
@mock.patch('os.path.exists', return_value=True)
|
||||||
def test_wait_for_volumes_removal_failure(self, exists_mock, sleep_mock):
|
def test_wait_for_volumes_removal_failure(self, exists_mock, sleep_mock):
|
||||||
retries = 3
|
retries = 61
|
||||||
names = ('sda', 'sdb')
|
names = ('sda', 'sdb')
|
||||||
self.assertRaises(exception.VolumePathNotRemoved,
|
self.assertRaises(exception.VolumePathNotRemoved,
|
||||||
self.linuxscsi.wait_for_volumes_removal, names)
|
self.linuxscsi.wait_for_volumes_removal, names)
|
||||||
|
|
Loading…
Reference in New Issue