Adds retry function to HNAS driver
Multiple requests sent to HNAS can cause concurrency problems and that ends up with 'SSC failed connection' errors. This patch adds a retry decorator to _execute in HDS HNAS Manila driver to fix this problem. The current retry functionality in Manila always uses fixed numbers to define the time to wait before performing the next attempts. This behavior can make the retries of multiple requests to collide in each attempt as they will wait the same amount of time and try to use the same resource together again. So additionally, this patch changes the behavior of manila.utils.retry() to receive a parameter that allows the function to implement randomly generated wait intervals. Change-Id: Ib862f62517fcc8816781204b902119e9b20121e0 Closes-bug: 1491550
This commit is contained in:
parent
6977b0196d
commit
8dc3863e10
@ -659,6 +659,10 @@ class HNASBackendException(ManilaException):
|
||||
message = _("HNAS Backend Exception: %(msg)s")
|
||||
|
||||
|
||||
class HNASConnException(ManilaException):
|
||||
message = _("HNAS Connection Exception: %(msg)s")
|
||||
|
||||
|
||||
# ConsistencyGroup
|
||||
class ConsistencyGroupNotFound(NotFound):
|
||||
message = _("ConsistencyGroup %(consistency_group_id)s could not be "
|
||||
|
@ -414,6 +414,7 @@ class HNASSSHBackend(object):
|
||||
msg = (_("Share %s was not created.") % share['id'])
|
||||
raise exception.HNASBackendException(msg=msg)
|
||||
|
||||
@mutils.retry(exception=exception.HNASConnException, wait_random=True)
|
||||
def _execute(self, commands):
|
||||
command = ['ssc', '127.0.0.1']
|
||||
if self.admin_ip0 is not None:
|
||||
@ -442,13 +443,19 @@ class HNASSSHBackend(object):
|
||||
'out': out, 'err': err})
|
||||
return out, err
|
||||
except processutils.ProcessExecutionError as e:
|
||||
LOG.debug("Command %(cmd)s result: out = %(out)s - err = "
|
||||
"%(err)s - exit = %(exit)s.", {'cmd': e.cmd,
|
||||
'out': e.stdout,
|
||||
'err': e.stderr,
|
||||
'exit': e.exit_code})
|
||||
LOG.error(_LE("Error running SSH command."))
|
||||
raise
|
||||
if 'Failed to establish SSC connection' in e.stderr:
|
||||
LOG.debug("SSC connection error!")
|
||||
msg = _("Failed to establish SSC connection.")
|
||||
raise exception.HNASConnException(msg=msg)
|
||||
else:
|
||||
LOG.debug("Command %(cmd)s result: out = %(out)s - err = "
|
||||
"%(err)s - exit = %(exit)s.", {'cmd': e.cmd,
|
||||
'out': e.stdout,
|
||||
'err': e.stderr,
|
||||
'exit':
|
||||
e.exit_code})
|
||||
LOG.error(_LE("Error running SSH command."))
|
||||
raise
|
||||
|
||||
def _check_fs_mounted(self, fs_name):
|
||||
self._check_fs()
|
||||
|
@ -23,6 +23,7 @@ import paramiko
|
||||
from manila import exception
|
||||
from manila.share.drivers.hitachi import ssh
|
||||
from manila import test
|
||||
from manila import utils
|
||||
|
||||
CONF = cfg.CONF
|
||||
|
||||
@ -69,7 +70,6 @@ file_system 1055 fake_span Umount 2 4 5
|
||||
file_system2 1050 fake_span2 NoEVS - 10 0 1
|
||||
fake_fs 1051 fake_span Umount 2 100 1024 """
|
||||
|
||||
|
||||
HNAS_RESULT_one_fs = """ \
|
||||
Instance name Dev On span State EVS Cap/GiB Confined Flag
|
||||
----------------- ---- ------- ------ --- ------- -------- ----
|
||||
@ -1170,6 +1170,31 @@ class HNASSSHTestCase(test.TestCase):
|
||||
port=self.port)
|
||||
self.assertIn('Request submitted successfully.', output)
|
||||
|
||||
def test__execute_retry(self):
|
||||
commands = ['tree-clone-job-submit', '-e', '/src', '/dst']
|
||||
concat_command = ('ssc --smuauth fake console-context --evs 2 '
|
||||
'tree-clone-job-submit -e /src /dst')
|
||||
msg = 'Failed to establish SSC connection'
|
||||
|
||||
item_mock = mock.Mock()
|
||||
self.mock_object(utils.pools.Pool, 'item',
|
||||
mock.Mock(return_value=item_mock))
|
||||
setattr(item_mock, '__enter__', mock.Mock())
|
||||
setattr(item_mock, '__exit__', mock.Mock())
|
||||
|
||||
self.mock_object(paramiko.SSHClient, 'connect')
|
||||
# testing retrying 3 times
|
||||
self.mock_object(putils, 'ssh_execute', mock.Mock(
|
||||
side_effect=[putils.ProcessExecutionError(stderr=msg),
|
||||
putils.ProcessExecutionError(stderr=msg),
|
||||
putils.ProcessExecutionError(stderr=msg),
|
||||
(HNAS_RESULT_job, '')]))
|
||||
|
||||
self._driver._execute(commands)
|
||||
|
||||
putils.ssh_execute.assert_called_with(mock.ANY, concat_command,
|
||||
check_exit_code=True)
|
||||
|
||||
def test__execute_ssh_exception(self):
|
||||
key = self.ssh_private_key
|
||||
commands = ['tree-clone-job-submit', '-e', '/src', '/dst']
|
||||
@ -1177,7 +1202,8 @@ class HNASSSHTestCase(test.TestCase):
|
||||
'tree-clone-job-submit -e /src /dst')
|
||||
self.mock_object(paramiko.SSHClient, 'connect')
|
||||
self.mock_object(putils, 'ssh_execute',
|
||||
mock.Mock(side_effect=putils.ProcessExecutionError))
|
||||
mock.Mock(side_effect=putils.ProcessExecutionError
|
||||
(stderr='Error')))
|
||||
|
||||
self.assertRaises(putils.ProcessExecutionError,
|
||||
self._driver._execute, commands)
|
||||
|
@ -694,6 +694,49 @@ class TestRetryDecorator(test.TestCase):
|
||||
self.assertEqual('success', ret)
|
||||
self.assertEqual(1, self.counter)
|
||||
|
||||
def test_no_retry_required_random(self):
|
||||
self.counter = 0
|
||||
|
||||
with mock.patch.object(time, 'sleep') as mock_sleep:
|
||||
@utils.retry(exception.ManilaException,
|
||||
interval=2,
|
||||
retries=3,
|
||||
backoff_rate=2,
|
||||
wait_random=True)
|
||||
def succeeds():
|
||||
self.counter += 1
|
||||
return 'success'
|
||||
|
||||
ret = succeeds()
|
||||
self.assertFalse(mock_sleep.called)
|
||||
self.assertEqual('success', ret)
|
||||
self.assertEqual(1, self.counter)
|
||||
|
||||
def test_retries_once_random(self):
|
||||
self.counter = 0
|
||||
interval = 2
|
||||
backoff_rate = 2
|
||||
retries = 3
|
||||
|
||||
with mock.patch.object(time, 'sleep') as mock_sleep:
|
||||
@utils.retry(exception.ManilaException,
|
||||
interval,
|
||||
retries,
|
||||
backoff_rate,
|
||||
wait_random=True)
|
||||
def fails_once():
|
||||
self.counter += 1
|
||||
if self.counter < 2:
|
||||
raise exception.ManilaException(data='fake')
|
||||
else:
|
||||
return 'success'
|
||||
|
||||
ret = fails_once()
|
||||
self.assertEqual('success', ret)
|
||||
self.assertEqual(2, self.counter)
|
||||
self.assertEqual(1, mock_sleep.call_count)
|
||||
self.assertTrue(mock_sleep.called)
|
||||
|
||||
def test_retries_once(self):
|
||||
self.counter = 0
|
||||
interval = 2
|
||||
|
@ -22,6 +22,7 @@ import errno
|
||||
import inspect
|
||||
import os
|
||||
import pyclbr
|
||||
import random
|
||||
import re
|
||||
import shutil
|
||||
import socket
|
||||
@ -540,23 +541,25 @@ class ComparableMixin(object):
|
||||
return self._compare(other, lambda s, o: s != o)
|
||||
|
||||
|
||||
def retry(exception, interval=1, retries=10, backoff_rate=2):
|
||||
def retry(exception, interval=1, retries=10, backoff_rate=2,
|
||||
wait_random=False):
|
||||
"""A wrapper around retrying library.
|
||||
|
||||
This decorator allows to log and to check 'retries' input param.
|
||||
Time interval between retries is calculated in the following way:
|
||||
interval * backoff_rate ^ previous_attempt_number
|
||||
This decorator allows to log and to check 'retries' input param.
|
||||
Time interval between retries is calculated in the following way:
|
||||
interval * backoff_rate ^ previous_attempt_number
|
||||
|
||||
:param exception: expected exception type. When wrapped function
|
||||
raises an exception of this type,the function
|
||||
execution is retried.
|
||||
:param interval: param 'interval' is used to calculate time interval
|
||||
between retries:
|
||||
:param exception: expected exception type. When wrapped function
|
||||
raises an exception of this type, the function
|
||||
execution is retried.
|
||||
:param interval: param 'interval' is used to calculate time interval
|
||||
between retries:
|
||||
interval * backoff_rate ^ previous_attempt_number
|
||||
:param retries: number of retries.
|
||||
:param backoff_rate: param 'backoff_rate' is used to calculate time
|
||||
interval between retries:
|
||||
interval * backoff_rate ^ previous_attempt_number
|
||||
:param retries: number of retries
|
||||
:param backoff_rate: param 'backoff_rate' is used to calculate time
|
||||
interval between retries:
|
||||
interval * backoff_rate ^ previous_attempt_number
|
||||
:param wait_random: boolean value to enable retry with random wait timer.
|
||||
|
||||
"""
|
||||
def _retry_on_exception(e):
|
||||
@ -565,8 +568,14 @@ def retry(exception, interval=1, retries=10, backoff_rate=2):
|
||||
def _backoff_sleep(previous_attempt_number, delay_since_first_attempt_ms):
|
||||
exp = backoff_rate ** previous_attempt_number
|
||||
wait_for = max(0, interval * exp)
|
||||
LOG.debug("Sleeping for %s seconds", wait_for)
|
||||
return wait_for * 1000.0
|
||||
|
||||
if wait_random:
|
||||
wait_val = random.randrange(interval * 1000.0, wait_for * 1000.0)
|
||||
else:
|
||||
wait_val = wait_for * 1000.0
|
||||
|
||||
LOG.debug("Sleeping for %s seconds.", (wait_val / 1000.0))
|
||||
return wait_val
|
||||
|
||||
def _print_stop(previous_attempt_number, delay_since_first_attempt_ms):
|
||||
delay_since_first_attempt = delay_since_first_attempt_ms / 1000.0
|
||||
|
Loading…
x
Reference in New Issue
Block a user