From 16bbe0e3a736a3f058e1ab67139e50e985ea8a2c Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Thu, 11 Dec 2014 17:44:16 +0100 Subject: [PATCH] Retry on Conflict exceptions from Ironic Retried is everything, except get and list calls and port creation (for which Conflict is an expected exception). Unit tests are added for the most critical cases. Change-Id: I92675c3d72a2f598c4e0d1ae8b0a4a4aa0a2ed12 Closes-Bug: #1401222 --- ironic_discoverd/discover.py | 5 +++-- ironic_discoverd/process.py | 11 ++++----- ironic_discoverd/test/test_discover.py | 13 ++++++----- ironic_discoverd/test/test_main.py | 22 ++++++++++++++++++ ironic_discoverd/test/test_process.py | 31 +++++++++++++++++++++++++- ironic_discoverd/utils.py | 20 ++++++++++++++++- 6 files changed, 88 insertions(+), 14 deletions(-) diff --git a/ironic_discoverd/discover.py b/ironic_discoverd/discover.py index 7b6937ec8..feb1c0854 100644 --- a/ironic_discoverd/discover.py +++ b/ironic_discoverd/discover.py @@ -74,7 +74,7 @@ def _validate(ironic, node): (node.uuid, power_state)) if not node.extra.get('ipmi_setup_credentials'): - validation = ironic.node.validate(node.uuid) + validation = utils.retry_on_conflict(ironic.node.validate, node.uuid) if not validation.power['result']: LOG.error('Failed validation of power interface for node %s, ' 'reason: %s', node.uuid, validation.power['reason']) @@ -101,7 +101,8 @@ def _background_start_discover(ironic, node): if not node.extra.get('ipmi_setup_credentials'): try: - ironic.node.set_power_state(node.uuid, 'reboot') + utils.retry_on_conflict(ironic.node.set_power_state, + node.uuid, 'reboot') except Exception as exc: LOG.error('Failed to power on node %s, check it\'s power ' 'management configuration:\n%s', node.uuid, exc) diff --git a/ironic_discoverd/process.py b/ironic_discoverd/process.py index 344086a3b..3f9a49bad 100644 --- a/ironic_discoverd/process.py +++ b/ironic_discoverd/process.py @@ -93,9 +93,9 @@ def _process_node(ironic, node, node_info, cached_node): {'mac': mac, 'node': node.uuid}) node_patches, port_patches = _run_post_hooks(node, ports, node_info) - node = ironic.node.update(node.uuid, node_patches) + node = utils.retry_on_conflict(ironic.node.update, node.uuid, node_patches) for mac, patches in port_patches.items(): - ironic.port.update(ports[mac].uuid, patches) + utils.retry_on_conflict(ironic.port.update, ports[mac].uuid, patches) LOG.info('Node %s was updated with data from discovery process', node.uuid) @@ -116,7 +116,8 @@ def _wait_for_power_management(ironic, cached_node): deadline = cached_node.started_at + conf.getint('discoverd', 'timeout') while time.time() < deadline: eventlet.greenthread.sleep(_POWER_CHECK_PERIOD) - validation = ironic.node.validate(cached_node.uuid) + validation = utils.retry_on_conflict(ironic.node.validate, + cached_node.uuid) if validation.power['result']: _finish_discovery(ironic, cached_node) return @@ -131,7 +132,7 @@ def _wait_for_power_management(ironic, cached_node): def _force_power_off(ironic, node): LOG.debug('Forcing power off of node %s', node.uuid) try: - ironic.node.set_power_state(node.uuid, 'off') + utils.retry_on_conflict(ironic.node.set_power_state, node.uuid, 'off') except Exception as exc: LOG.error('Failed to power off node %s, check it\'s power ' 'management configuration:\n%s', node.uuid, exc) @@ -143,4 +144,4 @@ def _finish_discovery(ironic, node): patch = [{'op': 'add', 'path': '/extra/newly_discovered', 'value': 'true'}, {'op': 'remove', 'path': '/extra/on_discovery'}] - ironic.node.update(node.uuid, patch) + utils.retry_on_conflict(ironic.node.update, node.uuid, patch) diff --git a/ironic_discoverd/test/test_discover.py b/ironic_discoverd/test/test_discover.py index 34e96df6a..e43a4771b 100644 --- a/ironic_discoverd/test/test_discover.py +++ b/ironic_discoverd/test/test_discover.py @@ -24,6 +24,7 @@ from ironic_discoverd.test import base as test_base from ironic_discoverd import utils +@mock.patch.object(eventlet.greenthread, 'sleep', lambda _: None) @mock.patch.object(eventlet.greenthread, 'spawn_n', side_effect=lambda f, *a: f(*a) and None) @mock.patch.object(firewall, 'update_filters', autospec=True) @@ -70,9 +71,11 @@ class TestDiscover(test_base.BaseTest): ] cli.node.list_ports.side_effect = ports # Failure to powering on does not cause total failure - cli.node.set_power_state.side_effect = [None, - exceptions.Conflict(), - None] + cli.node.set_power_state.side_effect = [ + None, + exceptions.Conflict(), # this is just retried + exceptions.InternalServerError(), + None] discover.discover(['uuid1', 'uuid2', 'uuid3']) @@ -93,7 +96,7 @@ class TestDiscover(test_base.BaseTest): mac=[]) filters_mock.assert_called_with(cli) self.assertEqual(2, filters_mock.call_count) # 1 node w/o ports - self.assertEqual(3, cli.node.set_power_state.call_count) + self.assertEqual(4, cli.node.set_power_state.call_count) cli.node.set_power_state.assert_called_with(mock.ANY, 'reboot') patch = [{'op': 'add', 'path': '/extra/on_discovery', 'value': 'true'}, {'op': 'add', 'path': '/extra/discovery_timestamp', @@ -133,7 +136,7 @@ class TestDiscover(test_base.BaseTest): discover.discover, ['uuid1', 'uuid2']) cli.node.get.side_effect = [ - exceptions.Conflict(), + exceptions.BadRequest(), self.node1, ] self.assertRaisesRegexp(utils.DiscoveryFailed, diff --git a/ironic_discoverd/test/test_main.py b/ironic_discoverd/test/test_main.py index 862a3238f..dd5a15bb3 100644 --- a/ironic_discoverd/test/test_main.py +++ b/ironic_discoverd/test/test_main.py @@ -14,6 +14,7 @@ import unittest import eventlet +from ironicclient import exceptions from keystoneclient import exceptions as keystone_exc import mock @@ -128,3 +129,24 @@ class TestPlugins(unittest.TestCase): def test_manager_is_cached(self): self.assertIs(plugins_base.processing_hooks_manager(), plugins_base.processing_hooks_manager()) + + +@mock.patch.object(eventlet.greenthread, 'sleep', lambda _: None) +class TestUtils(unittest.TestCase): + def test_retry_on_conflict(self): + call = mock.Mock() + call.side_effect = ([exceptions.Conflict()] * (utils.RETRY_COUNT - 1) + + [mock.sentinel.result]) + res = utils.retry_on_conflict(call, 1, 2, x=3) + self.assertEqual(mock.sentinel.result, res) + call.assert_called_with(1, 2, x=3) + self.assertEqual(utils.RETRY_COUNT, call.call_count) + + def test_retry_on_conflict_fail(self): + call = mock.Mock() + call.side_effect = ([exceptions.Conflict()] * (utils.RETRY_COUNT + 1) + + [mock.sentinel.result]) + self.assertRaises(exceptions.Conflict, utils.retry_on_conflict, + call, 1, 2, x=3) + call.assert_called_with(1, 2, x=3) + self.assertEqual(utils.RETRY_COUNT, call.call_count) diff --git a/ironic_discoverd/test/test_process.py b/ironic_discoverd/test/test_process.py index f68defcc2..e8639d8ee 100644 --- a/ironic_discoverd/test/test_process.py +++ b/ironic_discoverd/test/test_process.py @@ -247,6 +247,35 @@ class TestProcessNode(BaseTest): self.assertEqual(self.ports, sorted(post_hook_mock.call_args[0][1], key=lambda p: p.address)) + def test_update_retry_on_conflict(self, filters_mock, post_hook_mock): + self.cli.node.update.side_effect = [exceptions.Conflict, self.node, + exceptions.Conflict, self.node] + + self.call() + + self.cli.port.create.assert_any_call(node_uuid=self.uuid, + address=self.macs[0]) + self.cli.port.create.assert_any_call(node_uuid=self.uuid, + address=self.macs[1]) + self.cli.node.update.assert_any_call(self.uuid, self.patch_before) + self.cli.node.update.assert_any_call(self.uuid, self.patch_after) + self.assertEqual(4, self.cli.node.update.call_count) + self.cli.node.set_power_state.assert_called_once_with(self.uuid, 'off') + + def test_power_off_retry_on_conflict(self, filters_mock, post_hook_mock): + self.cli.node.set_power_state.side_effect = [exceptions.Conflict, None] + + self.call() + + self.cli.port.create.assert_any_call(node_uuid=self.uuid, + address=self.macs[0]) + self.cli.port.create.assert_any_call(node_uuid=self.uuid, + address=self.macs[1]) + self.cli.node.update.assert_any_call(self.uuid, self.patch_before) + self.cli.node.update.assert_any_call(self.uuid, self.patch_after) + self.cli.node.set_power_state.assert_called_with(self.uuid, 'off') + self.assertEqual(2, self.cli.node.set_power_state.call_count) + def test_port_failed(self, filters_mock, post_hook_mock): self.ports[0] = exceptions.Conflict() @@ -300,7 +329,7 @@ class TestProcessNode(BaseTest): self.assertFalse(self.cli.node.set_power_state.called) def test_power_off_failed(self, filters_mock, post_hook_mock): - self.cli.node.set_power_state.side_effect = exceptions.Conflict() + self.cli.node.set_power_state.side_effect = exceptions.BadRequest() self.assertRaisesRegexp(utils.DiscoveryFailed, 'Failed to power off', self.call) diff --git a/ironic_discoverd/utils.py b/ironic_discoverd/utils.py index 3db375084..2d3d74611 100644 --- a/ironic_discoverd/utils.py +++ b/ironic_discoverd/utils.py @@ -14,15 +14,19 @@ import logging import re +import eventlet from ironicclient import client +from ironicclient import exceptions from keystoneclient.v2_0 import client as keystone import six from ironic_discoverd import conf -LOG = logging.getLogger('discoverd') +LOG = logging.getLogger('ironic_discoverd.utils') OS_ARGS = ('os_password', 'os_username', 'os_auth_url', 'os_tenant_name') +RETRY_COUNT = 10 +RETRY_DELAY = 2 class DiscoveryFailed(Exception): @@ -45,3 +49,17 @@ def is_valid_mac(address): m = "[0-9a-f]{2}(:[0-9a-f]{2}){5}$" return (isinstance(address, six.string_types) and re.match(m, address.lower())) + + +def retry_on_conflict(call, *args, **kwargs): + for i in range(RETRY_COUNT): + try: + return call(*args, **kwargs) + except exceptions.Conflict as exc: + LOG.warning('Conflict on calling %s: %s, retry attempt %d', + getattr(call, '__name__', repr(call)), exc, i) + if i == RETRY_COUNT - 1: + raise + eventlet.greenthread.sleep(RETRY_DELAY) + + raise RuntimeError('unreachable code') # pragma: no cover