Retry on Conflict exceptions from Ironic
Retried is everything, except get and list calls and port creation (for which Conflict is an expected exception). Unit tests are added for the most critical cases. Change-Id: I92675c3d72a2f598c4e0d1ae8b0a4a4aa0a2ed12 Closes-Bug: #1401222
This commit is contained in:
parent
43b0769703
commit
16bbe0e3a7
|
@ -74,7 +74,7 @@ def _validate(ironic, node):
|
|||
(node.uuid, power_state))
|
||||
|
||||
if not node.extra.get('ipmi_setup_credentials'):
|
||||
validation = ironic.node.validate(node.uuid)
|
||||
validation = utils.retry_on_conflict(ironic.node.validate, node.uuid)
|
||||
if not validation.power['result']:
|
||||
LOG.error('Failed validation of power interface for node %s, '
|
||||
'reason: %s', node.uuid, validation.power['reason'])
|
||||
|
@ -101,7 +101,8 @@ def _background_start_discover(ironic, node):
|
|||
|
||||
if not node.extra.get('ipmi_setup_credentials'):
|
||||
try:
|
||||
ironic.node.set_power_state(node.uuid, 'reboot')
|
||||
utils.retry_on_conflict(ironic.node.set_power_state,
|
||||
node.uuid, 'reboot')
|
||||
except Exception as exc:
|
||||
LOG.error('Failed to power on node %s, check it\'s power '
|
||||
'management configuration:\n%s', node.uuid, exc)
|
||||
|
|
|
@ -93,9 +93,9 @@ def _process_node(ironic, node, node_info, cached_node):
|
|||
{'mac': mac, 'node': node.uuid})
|
||||
|
||||
node_patches, port_patches = _run_post_hooks(node, ports, node_info)
|
||||
node = ironic.node.update(node.uuid, node_patches)
|
||||
node = utils.retry_on_conflict(ironic.node.update, node.uuid, node_patches)
|
||||
for mac, patches in port_patches.items():
|
||||
ironic.port.update(ports[mac].uuid, patches)
|
||||
utils.retry_on_conflict(ironic.port.update, ports[mac].uuid, patches)
|
||||
|
||||
LOG.info('Node %s was updated with data from discovery process', node.uuid)
|
||||
|
||||
|
@ -116,7 +116,8 @@ def _wait_for_power_management(ironic, cached_node):
|
|||
deadline = cached_node.started_at + conf.getint('discoverd', 'timeout')
|
||||
while time.time() < deadline:
|
||||
eventlet.greenthread.sleep(_POWER_CHECK_PERIOD)
|
||||
validation = ironic.node.validate(cached_node.uuid)
|
||||
validation = utils.retry_on_conflict(ironic.node.validate,
|
||||
cached_node.uuid)
|
||||
if validation.power['result']:
|
||||
_finish_discovery(ironic, cached_node)
|
||||
return
|
||||
|
@ -131,7 +132,7 @@ def _wait_for_power_management(ironic, cached_node):
|
|||
def _force_power_off(ironic, node):
|
||||
LOG.debug('Forcing power off of node %s', node.uuid)
|
||||
try:
|
||||
ironic.node.set_power_state(node.uuid, 'off')
|
||||
utils.retry_on_conflict(ironic.node.set_power_state, node.uuid, 'off')
|
||||
except Exception as exc:
|
||||
LOG.error('Failed to power off node %s, check it\'s power '
|
||||
'management configuration:\n%s', node.uuid, exc)
|
||||
|
@ -143,4 +144,4 @@ def _finish_discovery(ironic, node):
|
|||
|
||||
patch = [{'op': 'add', 'path': '/extra/newly_discovered', 'value': 'true'},
|
||||
{'op': 'remove', 'path': '/extra/on_discovery'}]
|
||||
ironic.node.update(node.uuid, patch)
|
||||
utils.retry_on_conflict(ironic.node.update, node.uuid, patch)
|
||||
|
|
|
@ -24,6 +24,7 @@ from ironic_discoverd.test import base as test_base
|
|||
from ironic_discoverd import utils
|
||||
|
||||
|
||||
@mock.patch.object(eventlet.greenthread, 'sleep', lambda _: None)
|
||||
@mock.patch.object(eventlet.greenthread, 'spawn_n',
|
||||
side_effect=lambda f, *a: f(*a) and None)
|
||||
@mock.patch.object(firewall, 'update_filters', autospec=True)
|
||||
|
@ -70,9 +71,11 @@ class TestDiscover(test_base.BaseTest):
|
|||
]
|
||||
cli.node.list_ports.side_effect = ports
|
||||
# Failure to powering on does not cause total failure
|
||||
cli.node.set_power_state.side_effect = [None,
|
||||
exceptions.Conflict(),
|
||||
None]
|
||||
cli.node.set_power_state.side_effect = [
|
||||
None,
|
||||
exceptions.Conflict(), # this is just retried
|
||||
exceptions.InternalServerError(),
|
||||
None]
|
||||
|
||||
discover.discover(['uuid1', 'uuid2', 'uuid3'])
|
||||
|
||||
|
@ -93,7 +96,7 @@ class TestDiscover(test_base.BaseTest):
|
|||
mac=[])
|
||||
filters_mock.assert_called_with(cli)
|
||||
self.assertEqual(2, filters_mock.call_count) # 1 node w/o ports
|
||||
self.assertEqual(3, cli.node.set_power_state.call_count)
|
||||
self.assertEqual(4, cli.node.set_power_state.call_count)
|
||||
cli.node.set_power_state.assert_called_with(mock.ANY, 'reboot')
|
||||
patch = [{'op': 'add', 'path': '/extra/on_discovery', 'value': 'true'},
|
||||
{'op': 'add', 'path': '/extra/discovery_timestamp',
|
||||
|
@ -133,7 +136,7 @@ class TestDiscover(test_base.BaseTest):
|
|||
discover.discover, ['uuid1', 'uuid2'])
|
||||
|
||||
cli.node.get.side_effect = [
|
||||
exceptions.Conflict(),
|
||||
exceptions.BadRequest(),
|
||||
self.node1,
|
||||
]
|
||||
self.assertRaisesRegexp(utils.DiscoveryFailed,
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
import unittest
|
||||
|
||||
import eventlet
|
||||
from ironicclient import exceptions
|
||||
from keystoneclient import exceptions as keystone_exc
|
||||
import mock
|
||||
|
||||
|
@ -128,3 +129,24 @@ class TestPlugins(unittest.TestCase):
|
|||
def test_manager_is_cached(self):
|
||||
self.assertIs(plugins_base.processing_hooks_manager(),
|
||||
plugins_base.processing_hooks_manager())
|
||||
|
||||
|
||||
@mock.patch.object(eventlet.greenthread, 'sleep', lambda _: None)
|
||||
class TestUtils(unittest.TestCase):
|
||||
def test_retry_on_conflict(self):
|
||||
call = mock.Mock()
|
||||
call.side_effect = ([exceptions.Conflict()] * (utils.RETRY_COUNT - 1)
|
||||
+ [mock.sentinel.result])
|
||||
res = utils.retry_on_conflict(call, 1, 2, x=3)
|
||||
self.assertEqual(mock.sentinel.result, res)
|
||||
call.assert_called_with(1, 2, x=3)
|
||||
self.assertEqual(utils.RETRY_COUNT, call.call_count)
|
||||
|
||||
def test_retry_on_conflict_fail(self):
|
||||
call = mock.Mock()
|
||||
call.side_effect = ([exceptions.Conflict()] * (utils.RETRY_COUNT + 1)
|
||||
+ [mock.sentinel.result])
|
||||
self.assertRaises(exceptions.Conflict, utils.retry_on_conflict,
|
||||
call, 1, 2, x=3)
|
||||
call.assert_called_with(1, 2, x=3)
|
||||
self.assertEqual(utils.RETRY_COUNT, call.call_count)
|
||||
|
|
|
@ -247,6 +247,35 @@ class TestProcessNode(BaseTest):
|
|||
self.assertEqual(self.ports, sorted(post_hook_mock.call_args[0][1],
|
||||
key=lambda p: p.address))
|
||||
|
||||
def test_update_retry_on_conflict(self, filters_mock, post_hook_mock):
|
||||
self.cli.node.update.side_effect = [exceptions.Conflict, self.node,
|
||||
exceptions.Conflict, self.node]
|
||||
|
||||
self.call()
|
||||
|
||||
self.cli.port.create.assert_any_call(node_uuid=self.uuid,
|
||||
address=self.macs[0])
|
||||
self.cli.port.create.assert_any_call(node_uuid=self.uuid,
|
||||
address=self.macs[1])
|
||||
self.cli.node.update.assert_any_call(self.uuid, self.patch_before)
|
||||
self.cli.node.update.assert_any_call(self.uuid, self.patch_after)
|
||||
self.assertEqual(4, self.cli.node.update.call_count)
|
||||
self.cli.node.set_power_state.assert_called_once_with(self.uuid, 'off')
|
||||
|
||||
def test_power_off_retry_on_conflict(self, filters_mock, post_hook_mock):
|
||||
self.cli.node.set_power_state.side_effect = [exceptions.Conflict, None]
|
||||
|
||||
self.call()
|
||||
|
||||
self.cli.port.create.assert_any_call(node_uuid=self.uuid,
|
||||
address=self.macs[0])
|
||||
self.cli.port.create.assert_any_call(node_uuid=self.uuid,
|
||||
address=self.macs[1])
|
||||
self.cli.node.update.assert_any_call(self.uuid, self.patch_before)
|
||||
self.cli.node.update.assert_any_call(self.uuid, self.patch_after)
|
||||
self.cli.node.set_power_state.assert_called_with(self.uuid, 'off')
|
||||
self.assertEqual(2, self.cli.node.set_power_state.call_count)
|
||||
|
||||
def test_port_failed(self, filters_mock, post_hook_mock):
|
||||
self.ports[0] = exceptions.Conflict()
|
||||
|
||||
|
@ -300,7 +329,7 @@ class TestProcessNode(BaseTest):
|
|||
self.assertFalse(self.cli.node.set_power_state.called)
|
||||
|
||||
def test_power_off_failed(self, filters_mock, post_hook_mock):
|
||||
self.cli.node.set_power_state.side_effect = exceptions.Conflict()
|
||||
self.cli.node.set_power_state.side_effect = exceptions.BadRequest()
|
||||
|
||||
self.assertRaisesRegexp(utils.DiscoveryFailed, 'Failed to power off',
|
||||
self.call)
|
||||
|
|
|
@ -14,15 +14,19 @@
|
|||
import logging
|
||||
import re
|
||||
|
||||
import eventlet
|
||||
from ironicclient import client
|
||||
from ironicclient import exceptions
|
||||
from keystoneclient.v2_0 import client as keystone
|
||||
import six
|
||||
|
||||
from ironic_discoverd import conf
|
||||
|
||||
|
||||
LOG = logging.getLogger('discoverd')
|
||||
LOG = logging.getLogger('ironic_discoverd.utils')
|
||||
OS_ARGS = ('os_password', 'os_username', 'os_auth_url', 'os_tenant_name')
|
||||
RETRY_COUNT = 10
|
||||
RETRY_DELAY = 2
|
||||
|
||||
|
||||
class DiscoveryFailed(Exception):
|
||||
|
@ -45,3 +49,17 @@ def is_valid_mac(address):
|
|||
m = "[0-9a-f]{2}(:[0-9a-f]{2}){5}$"
|
||||
return (isinstance(address, six.string_types)
|
||||
and re.match(m, address.lower()))
|
||||
|
||||
|
||||
def retry_on_conflict(call, *args, **kwargs):
|
||||
for i in range(RETRY_COUNT):
|
||||
try:
|
||||
return call(*args, **kwargs)
|
||||
except exceptions.Conflict as exc:
|
||||
LOG.warning('Conflict on calling %s: %s, retry attempt %d',
|
||||
getattr(call, '__name__', repr(call)), exc, i)
|
||||
if i == RETRY_COUNT - 1:
|
||||
raise
|
||||
eventlet.greenthread.sleep(RETRY_DELAY)
|
||||
|
||||
raise RuntimeError('unreachable code') # pragma: no cover
|
||||
|
|
Loading…
Reference in New Issue