Merge "Retry port lists on failure in PXE filter periodic sync"

This commit is contained in:
Zuul 2018-02-15 00:40:57 +00:00 committed by Gerrit Code Review
commit 3b9e4c0656
8 changed files with 115 additions and 5 deletions

View File

@ -17,6 +17,7 @@ from ironicclient import client
from ironicclient import exceptions as ironic_exc
import netaddr
from oslo_config import cfg
import retrying
from ironic_inspector.common.i18n import _
from ironic_inspector.common import keystone
@ -162,3 +163,16 @@ def get_node(node_id, ironic=None, **kwargs):
except ironic_exc.HttpError as exc:
raise utils.Error(_("Cannot get node %(node)s: %(exc)s") %
{'node': node_id, 'exc': exc})
@retrying.retry(
retry_on_exception=lambda exc: isinstance(exc, ironic_exc.ClientException),
stop_max_attempt_number=5, wait_fixed=1000)
def call_with_retries(func, *args, **kwargs):
"""Call an ironic client function retrying all errors.
If an ironic client exception is raised, try calling the func again,
at most 5 times, waiting 1 sec between each call. If on the 5th attempt
the func raises again, the exception is propagated to the caller.
"""
return func(*args, **kwargs)

View File

@ -66,7 +66,8 @@ class DnsmasqFilter(pxe_filter.BaseFilter):
timestamp_start = timeutils.utcnow()
active_macs = node_cache.active_macs()
ironic_macs = set(port.address for port in
ironic.port.list(limit=0, fields=['address']))
ir_utils.call_with_retries(ironic.port.list, limit=0,
fields=['address']))
blacklist_macs = _get_blacklist()
# NOTE(milan) whitelist MACs of ports not kept in ironic anymore
# also whitelist active MACs that are still blacklisted in the

View File

@ -19,6 +19,7 @@ from eventlet.green import subprocess
from oslo_config import cfg
from oslo_log import log
from ironic_inspector.common import ironic as ir_utils
from ironic_inspector import node_cache
from ironic_inspector.pxe_filter import base as pxe_filter
@ -225,9 +226,9 @@ def _ib_mac_to_rmac_mapping(ports):
def _get_blacklist(ironic):
ports = [
port.address for port in ironic.port.list(
limit=0, fields=['address', 'extra'])
if port.address not in node_cache.active_macs()]
ports = [port.address for port in
ir_utils.call_with_retries(ironic.port.list, limit=0,
fields=['address', 'extra'])
if port.address not in node_cache.active_macs()]
_ib_mac_to_rmac_mapping(ports)
return ports

View File

@ -15,6 +15,7 @@ import socket
import unittest
from ironicclient import client
from ironicclient import exc as ironic_exc
import mock
from oslo_config import cfg
@ -144,3 +145,41 @@ class TestCapabilities(unittest.TestCase):
output = ir_utils.dict_to_capabilities(capabilities_dict)
self.assertIn('cat:meow', output)
self.assertIn('dog:wuff', output)
class TestCallWithRetries(unittest.TestCase):
def setUp(self):
super(TestCallWithRetries, self).setUp()
self.call = mock.Mock(spec=[])
def test_no_retries_on_success(self):
result = ir_utils.call_with_retries(self.call, 'meow', answer=42)
self.assertEqual(result, self.call.return_value)
self.call.assert_called_once_with('meow', answer=42)
def test_no_retries_on_python_error(self):
self.call.side_effect = RuntimeError('boom')
self.assertRaisesRegexp(RuntimeError, 'boom',
ir_utils.call_with_retries,
self.call, 'meow', answer=42)
self.call.assert_called_once_with('meow', answer=42)
@mock.patch('time.sleep', lambda _x: None)
def test_retries_on_ironicclient_error(self):
self.call.side_effect = [
ironic_exc.ClientException('boom')
] * 3 + [mock.sentinel.result]
result = ir_utils.call_with_retries(self.call, 'meow', answer=42)
self.assertEqual(result, mock.sentinel.result)
self.call.assert_called_with('meow', answer=42)
self.assertEqual(4, self.call.call_count)
@mock.patch('time.sleep', lambda _x: None)
def test_retries_on_ironicclient_error_with_failure(self):
self.call.side_effect = ironic_exc.ClientException('boom')
self.assertRaisesRegexp(ironic_exc.ClientException, 'boom',
ir_utils.call_with_retries,
self.call, 'meow', answer=42)
self.call.assert_called_with('meow', answer=42)
self.assertEqual(5, self.call.call_count)

View File

@ -15,6 +15,7 @@ import datetime
import os
import fixtures
from ironicclient import exc as ironic_exc
import mock
from oslo_config import cfg
import six
@ -299,6 +300,29 @@ class TestSync(DnsmasqTestBase):
self.timestamp_end - self.timestamp_start)
])
@mock.patch('time.sleep', lambda _x: None)
def test__sync_with_port_list_retries(self):
self.mock_ironic.port.list.side_effect = [
ironic_exc.ConnectionRefused('boom'),
[mock.Mock(address=address) for address in self.ironic_macs]
]
self.driver._sync(self.mock_ironic)
self.mock__whitelist_mac.assert_has_calls([mock.call('active_mac'),
mock.call('gone_mac')],
any_order=True)
self.mock__blacklist_mac.assert_has_calls([mock.call('new_mac')],
any_order=True)
self.mock_ironic.port.list.assert_called_with(limit=0,
fields=['address'])
self.mock_active_macs.assert_called_once_with()
self.mock__get_blacklist.assert_called_once_with()
self.mock_log.debug.assert_has_calls([
mock.call('Syncing the driver'),
mock.call('The dnsmasq PXE filter was synchronized (took %s)',
self.timestamp_end - self.timestamp_start)
])
class Test_Execute(test_base.BaseTest):
def setUp(self):

View File

@ -14,6 +14,7 @@
# under the License.
import fixtures
from ironicclient import exc as ironic_exc
import mock
from oslo_config import cfg
@ -354,3 +355,21 @@ class TestGetBlacklist(test_base.BaseTest):
self.mock_ironic.port.list.assert_called_once_with(
limit=0, fields=['address', 'extra'])
self.mock__ib_mac_to_rmac_mapping.assert_called_once_with(ports)
@mock.patch('time.sleep', lambda _x: None)
def test_retry_on_port_list_failure(self):
self.mock_ironic.port.list.side_effect = [
ironic_exc.ConnectionRefused('boom'),
[
mock.Mock(address='foo'),
mock.Mock(address='bar'),
]
]
self.mock_active_macs.return_value = {'foo'}
ports = iptables._get_blacklist(self.mock_ironic)
# foo is an active address so we expect the blacklist contains only bar
self.assertEqual(['bar'], ports)
self.mock_ironic.port.list.assert_called_with(
limit=0, fields=['address', 'extra'])
self.mock__ib_mac_to_rmac_mapping.assert_called_once_with(ports)

View File

@ -0,0 +1,11 @@
---
fixes:
- |
The periodic PXE filter update task now retries fetching port list from
the Bare Metal service 5 times (with 1 second delay) before giving up.
This ensures that a temporary networking glitch will not result in
the ironic-inspector service stopping.
upgrade:
- |
Adds dependency on the `retrying <https://github.com/rholder/retrying>`_
python library.

View File

@ -29,6 +29,7 @@ oslo.policy>=1.30.0 # Apache-2.0
oslo.rootwrap>=5.8.0 # Apache-2.0
oslo.serialization!=2.19.1,>=2.18.0 # Apache-2.0
oslo.utils>=3.33.0 # Apache-2.0
retrying!=1.3.0,>=1.2.3 # Apache-2.0
six>=1.10.0 # MIT
stevedore>=1.20.0 # Apache-2.0
SQLAlchemy!=1.1.5,!=1.1.6,!=1.1.7,!=1.1.8,>=1.0.10 # MIT