Wait for at least one interface before node lookup

During node look up sometimes IPA sends hardware inventory information before
the interfaces are up, resulting in empty interfaces information being sent
to the conductor. This causes conductor to throw exceptions and polluting
the log until the real interfaces information is populated. This change makes
IPA to wait for at least one interface to be up before trying for node lookup.

Change-Id: Ifdb91298eaa5c725f108fa722263ed925691ecda
Closes-Bug: #1562786
This commit is contained in:
vmud213 2016-04-08 15:13:23 +00:00
parent 9965b449d6
commit 09db71d640
3 changed files with 71 additions and 4 deletions

View File

@ -38,6 +38,13 @@ from ironic_python_agent import utils
LOG = log.getLogger(__name__)
# Time(in seconds) to wait for any of the interfaces to be up
# before lookup of the node is attempted
NETWORK_WAIT_TIMEOUT = 60
# Time(in seconds) to wait before reattempt
NETWORK_WAIT_RETRY = 5
def _time():
"""Wraps time.time() for simpler testing."""
@ -283,6 +290,24 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
if not self.standalone:
self.heartbeater.force_heartbeat()
def _wait_for_interface(self):
"""Wait until at least one interface is up."""
wait_till = time.time() + NETWORK_WAIT_TIMEOUT
while time.time() < wait_till:
interfaces = hardware.dispatch_to_managers(
'list_network_interfaces')
if not any(ifc.mac_address for ifc in interfaces):
LOG.debug('Network is not up yet. '
'No valid interfaces found, retrying ...')
time.sleep(NETWORK_WAIT_RETRY)
else:
break
else:
LOG.warning("No valid network interfaces found. "
"Node lookup will probably fail.")
def run(self):
"""Run the Ironic Python Agent."""
# Get the UUID so we can heartbeat to Ironic. Raises LookupNodeError
@ -303,6 +328,7 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
# lookup will fail due to unknown MAC.
uuid = inspector.inspect()
self._wait_for_interface()
content = self.api_client.lookup_node(
hardware_info=hardware.dispatch_to_managers(
'list_hardware_info'),

View File

@ -150,6 +150,8 @@ class TestBaseAgent(test_base.BaseTestCase):
make_test_instance([extension.Extension('fake', None,
FakeExtension,
FakeExtension())])
self.sample_nw_iface = hardware.NetworkInterface(
"eth9", "AA:BB:CC:DD:EE:FF", "1.2.3.4", True)
def assertEqualEncoded(self, a, b):
# Evidently JSONEncoder.default() can't handle None (??) so we have to
@ -169,9 +171,11 @@ class TestBaseAgent(test_base.BaseTestCase):
self.assertEqual(pkg_resources.get_distribution('ironic-python-agent')
.version, status.version)
@mock.patch.object(agent.IronicPythonAgent,
'_wait_for_interface')
@mock.patch.object(hardware, 'dispatch_to_managers', autospec=True)
@mock.patch('wsgiref.simple_server.make_server', autospec=True)
@mock.patch.object(hardware.HardwareManager, 'list_hardware_info')
def test_run(self, mocked_list_hardware, wsgi_server_cls):
def test_run(self, wsgi_server_cls, mocked_dispatch, mocked_wait):
CONF.set_override('inspection_callback_url', '', enforce_type=True)
wsgi_server = wsgi_server_cls.return_value
wsgi_server.start.side_effect = KeyboardInterrupt()
@ -193,14 +197,19 @@ class TestBaseAgent(test_base.BaseTestCase):
self.agent.api,
server_class=simple_server.WSGIServer)
wsgi_server.serve_forever.assert_called_once_with()
mocked_wait.assert_called_once_with()
mocked_dispatch.assert_called_once_with("list_hardware_info")
self.agent.heartbeater.start.assert_called_once_with()
@mock.patch.object(agent.IronicPythonAgent,
'_wait_for_interface')
@mock.patch.object(inspector, 'inspect', autospec=True)
@mock.patch.object(hardware, 'dispatch_to_managers', autospec=True)
@mock.patch('wsgiref.simple_server.make_server', autospec=True)
@mock.patch.object(hardware.HardwareManager, 'list_hardware_info')
def test_run_with_inspection(self, mocked_list_hardware, wsgi_server_cls,
mocked_inspector):
mocked_dispatch, mocked_inspector,
mocked_wait):
CONF.set_override('inspection_callback_url', 'http://foo/bar',
enforce_type=True)
@ -232,8 +241,34 @@ class TestBaseAgent(test_base.BaseTestCase):
'uuid',
self.agent.api_client.lookup_node.call_args[1]['node_uuid'])
mocked_wait.assert_called_once_with()
mocked_dispatch.assert_called_once_with("list_hardware_info")
self.agent.heartbeater.start.assert_called_once_with()
@mock.patch.object(time, 'time', autospec=True)
@mock.patch.object(time, 'sleep', autospec=True)
@mock.patch.object(hardware, 'dispatch_to_managers', autospec=True)
def test__wait_for_interface(self, mocked_dispatch, mocked_sleep,
mock_time):
mocked_dispatch.return_value = [self.sample_nw_iface, {}]
mock_time.return_value = 10
self.agent._wait_for_interface()
mocked_dispatch.assert_called_once_with('list_network_interfaces')
self.assertFalse(mocked_sleep.called)
@mock.patch.object(time, 'time', autospec=True)
@mock.patch.object(time, 'sleep', autospec=True)
@mock.patch.object(hardware, 'dispatch_to_managers', autospec=True)
def test__wait_for_interface_expired(self, mocked_dispatch, mocked_sleep,
mock_time):
mock_time.side_effect = [10, 11, 20, 25, 30]
mocked_dispatch.side_effect = [[], [], [self.sample_nw_iface], {}]
expected_sleep_calls = [mock.call(agent.NETWORK_WAIT_RETRY)] * 2
expected_dispatch_calls = [mock.call("list_network_interfaces")] * 3
self.agent._wait_for_interface()
mocked_dispatch.assert_has_calls(expected_dispatch_calls)
mocked_sleep.assert_has_calls(expected_sleep_calls)
@mock.patch.object(time, 'sleep', autospec=True)
@mock.patch('wsgiref.simple_server.make_server', autospec=True)
@mock.patch.object(hardware.HardwareManager, 'list_hardware_info')

View File

@ -0,0 +1,6 @@
---
fixes:
- Ensures the node look up is attempted with valid interfaces information
to avoid exception log messages in the conductor log. IPA tries to
retrieve interfaces information every 5 seconds until either at least
one of the interfaces is up or 60 seconds is lapsed.