Merge "Wait for the interfaces to get IP addresses before inspection"

This commit is contained in:
Jenkins
2016-04-06 17:29:36 +00:00
committed by Gerrit Code Review
7 changed files with 163 additions and 10 deletions

View File

@@ -114,6 +114,13 @@ cli_opts = [
help='Comma-separated list of plugins providing additional '
'hardware data for inspection, empty value gives '
'a minimum required set of plugins.'),
cfg.IntOpt('inspection_dhcp_wait_timeout',
default=APARAMS.get('ipa-inspection-dhcp-wait-timeout',
inspector.DEFAULT_DHCP_WAIT_TIMEOUT),
help='Maximum time (in seconds) to wait for all NIC\'s '
'to get their IP addresses via DHCP before inspection. '
'Set to 0 to disable waiting completely.'),
]
CONF.register_cli_opts(cli_opts)

View File

@@ -175,12 +175,14 @@ class BlockDevice(encoding.SerializableComparable):
class NetworkInterface(encoding.SerializableComparable):
serializable_fields = ('name', 'mac_address', 'switch_port_descr',
'switch_chassis_descr', 'ipv4_address')
'switch_chassis_descr', 'ipv4_address',
'has_carrier')
def __init__(self, name, mac_addr, ipv4_address=None):
def __init__(self, name, mac_addr, ipv4_address=None, has_carrier=True):
self.name = name
self.mac_address = mac_addr
self.ipv4_address = ipv4_address
self.has_carrier = has_carrier
# TODO(russellhaering): Pull these from LLDP
self.switch_port_descr = None
self.switch_chassis_descr = None
@@ -402,7 +404,8 @@ class GenericHardwareManager(HardwareManager):
return NetworkInterface(
interface_name, mac_addr,
ipv4_address=self.get_ipv4_addr(interface_name))
ipv4_address=self.get_ipv4_addr(interface_name),
has_carrier=self._interface_has_carrier(interface_name))
def get_ipv4_addr(self, interface_id):
try:
@@ -412,6 +415,17 @@ class GenericHardwareManager(HardwareManager):
# No default IPv4 address found
return None
def _interface_has_carrier(self, interface_name):
path = '{0}/class/net/{1}/carrier'.format(self.sys_path,
interface_name)
try:
with open(path, 'rt') as fp:
return fp.read().strip() == '1'
except EnvironmentError:
LOG.debug('No carrier information for interface %s',
interface_name)
return False
def _is_device(self, interface_name):
device_path = '{0}/class/net/{1}/device'.format(self.sys_path,
interface_name)

View File

@@ -17,6 +17,7 @@ import base64
import io
import json
import tarfile
import time
import netaddr
from oslo_concurrency import processutils
@@ -36,6 +37,9 @@ from ironic_python_agent import utils
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
DEFAULT_COLLECTOR = 'default'
DEFAULT_DHCP_WAIT_TIMEOUT = 60
_DHCP_RETRY_INTERVAL = 2
_COLLECTOR_NS = 'ironic_python_agent.inspector.collectors'
_NO_LOGGING_FIELDS = ('logs',)
@@ -215,6 +219,38 @@ def discover_scheduling_properties(inventory, data, root_disk=None):
LOG.info('value for %s is %s', key, data[key])
def wait_for_dhcp():
"""Wait until all NIC's get their IP addresses via DHCP or timeout happens.
Ignores interfaces which do not even have a carrier.
Note: only supports IPv4 addresses for now.
:return: True if all NIC's got IP addresses, False if timeout happened.
Also returns True if waiting is disabled via configuration.
"""
if not CONF.inspection_dhcp_wait_timeout:
return True
threshold = time.time() + CONF.inspection_dhcp_wait_timeout
while time.time() <= threshold:
interfaces = hardware.dispatch_to_managers('list_network_interfaces')
missing = [iface.name for iface in interfaces
if iface.has_carrier and not iface.ipv4_address]
if not missing:
return True
LOG.debug('Still waiting for interfaces %s to get IP addresses',
missing)
time.sleep(_DHCP_RETRY_INTERVAL)
LOG.warning('Not all network interfaces received IP addresses in '
'%(timeout)d seconds: %(missing)s',
{'timeout': CONF.inspection_dhcp_wait_timeout,
'missing': missing})
return False
def collect_default(data, failures):
"""The default inspection collector.
@@ -229,6 +265,7 @@ def collect_default(data, failures):
:param data: mutable data that we'll send to inspector
:param failures: AccumulatedFailures object
"""
wait_for_dhcp()
inventory = hardware.dispatch_to_managers('list_hardware_info')
# In the future we will only need the current version of inventory,

View File

@@ -268,7 +268,7 @@ class TestGenericHardwareManager(test_base.BaseTestCase):
mocked_open.return_value.__enter__ = lambda s: s
mocked_open.return_value.__exit__ = mock.Mock()
read_mock = mocked_open.return_value.read
read_mock.return_value = '00:0c:29:8c:11:b1\n'
read_mock.side_effect = ['00:0c:29:8c:11:b1\n', '1']
mocked_ifaddresses.return_value = {
netifaces.AF_INET: [{'addr': '192.168.1.2'}]
}
@@ -277,6 +277,32 @@ class TestGenericHardwareManager(test_base.BaseTestCase):
self.assertEqual('eth0', interfaces[0].name)
self.assertEqual('00:0c:29:8c:11:b1', interfaces[0].mac_address)
self.assertEqual('192.168.1.2', interfaces[0].ipv4_address)
self.assertTrue(interfaces[0].has_carrier)
@mock.patch('netifaces.ifaddresses')
@mock.patch('os.listdir')
@mock.patch('os.path.exists')
@mock.patch('six.moves.builtins.open')
def test_list_network_interfaces_no_carrier(self,
mocked_open,
mocked_exists,
mocked_listdir,
mocked_ifaddresses):
mocked_listdir.return_value = ['lo', 'eth0']
mocked_exists.side_effect = [False, True]
mocked_open.return_value.__enter__ = lambda s: s
mocked_open.return_value.__exit__ = mock.Mock()
read_mock = mocked_open.return_value.read
read_mock.side_effect = ['00:0c:29:8c:11:b1\n', OSError('boom')]
mocked_ifaddresses.return_value = {
netifaces.AF_INET: [{'addr': '192.168.1.2'}]
}
interfaces = self.hardware.list_network_interfaces()
self.assertEqual(1, len(interfaces))
self.assertEqual('eth0', interfaces[0].name)
self.assertEqual('00:0c:29:8c:11:b1', interfaces[0].mac_address)
self.assertEqual('192.168.1.2', interfaces[0].ipv4_address)
self.assertFalse(interfaces[0].has_carrier)
@mock.patch.object(utils, 'execute')
def test_get_os_install_device(self, mocked_execute):

View File

@@ -18,6 +18,7 @@ import collections
import copy
import io
import tarfile
import time
import unittest
import mock
@@ -328,11 +329,13 @@ class TestDiscoverSchedulingProperties(BaseDiscoverTest):
@mock.patch.object(utils, 'get_agent_params',
lambda: {'BOOTIF': 'boot:if'})
@mock.patch.object(inspector, 'wait_for_dhcp', autospec=True)
@mock.patch.object(inspector, 'discover_scheduling_properties', autospec=True)
@mock.patch.object(inspector, 'discover_network_properties', autospec=True)
@mock.patch.object(hardware, 'dispatch_to_managers', autospec=True)
class TestCollectDefault(BaseDiscoverTest):
def test_ok(self, mock_dispatch, mock_discover_net, mock_discover_sched):
def test_ok(self, mock_dispatch, mock_discover_net, mock_discover_sched,
mock_wait_for_dhcp):
mock_dispatch.return_value = self.inventory
inspector.collect_default(self.data, self.failures)
@@ -351,9 +354,10 @@ class TestCollectDefault(BaseDiscoverTest):
mock_discover_sched.assert_called_once_with(
self.inventory, self.data,
root_disk=self.inventory['disks'][0])
mock_wait_for_dhcp.assert_called_once_with()
def test_no_root_disk(self, mock_dispatch, mock_discover_net,
mock_discover_sched):
mock_discover_sched, mock_wait_for_dhcp):
mock_dispatch.return_value = self.inventory
self.inventory['disks'] = []
@@ -371,6 +375,7 @@ class TestCollectDefault(BaseDiscoverTest):
self.failures)
mock_discover_sched.assert_called_once_with(
self.inventory, self.data, root_disk=None)
mock_wait_for_dhcp.assert_called_once_with()
@mock.patch.object(utils, 'execute', autospec=True)
@@ -453,3 +458,56 @@ class TestCollectExtraHardware(unittest.TestCase):
self.assertNotIn('data', self.data)
self.assertTrue(self.failures)
mock_execute.assert_called_once_with('hardware-detect')
@mock.patch.object(hardware, 'dispatch_to_managers', autospec=True)
class TestWaitForDhcp(unittest.TestCase):
def setUp(self):
super(TestWaitForDhcp, self).setUp()
CONF.set_override('inspection_dhcp_wait_timeout',
inspector.DEFAULT_DHCP_WAIT_TIMEOUT)
@mock.patch.object(time, 'sleep', autospec=True)
def test_ok(self, mocked_sleep, mocked_dispatch):
mocked_dispatch.side_effect = [
[hardware.NetworkInterface(name='em0', mac_addr='abcd',
ipv4_address=None),
hardware.NetworkInterface(name='em1', mac_addr='abcd',
ipv4_address='1.2.3.4')],
[hardware.NetworkInterface(name='em0', mac_addr='abcd',
ipv4_address=None),
hardware.NetworkInterface(name='em1', mac_addr='abcd',
ipv4_address='1.2.3.4')],
[hardware.NetworkInterface(name='em0', mac_addr='abcd',
ipv4_address='1.1.1.1'),
hardware.NetworkInterface(name='em1', mac_addr='abcd',
ipv4_address='1.2.3.4')],
]
self.assertTrue(inspector.wait_for_dhcp())
mocked_dispatch.assert_called_with('list_network_interfaces')
self.assertEqual(2, mocked_sleep.call_count)
self.assertEqual(3, mocked_dispatch.call_count)
@mock.patch.object(inspector, '_DHCP_RETRY_INTERVAL', 0.01)
def test_timeout(self, mocked_dispatch):
CONF.set_override('inspection_dhcp_wait_timeout', 0.02)
mocked_dispatch.return_value = [
hardware.NetworkInterface(name='em0', mac_addr='abcd',
ipv4_address=None),
hardware.NetworkInterface(name='em1', mac_addr='abcd',
ipv4_address='1.2.3.4'),
]
self.assertFalse(inspector.wait_for_dhcp())
mocked_dispatch.assert_called_with('list_network_interfaces')
def test_disabled(self, mocked_dispatch):
CONF.set_override('inspection_dhcp_wait_timeout', 0)
self.assertTrue(inspector.wait_for_dhcp())
self.assertFalse(mocked_dispatch.called)

View File

@@ -160,14 +160,16 @@ class TestBaseIronicPythonAgent(test_base.BaseTestCase):
u'name': u'eth0',
u'ipv4_address': None,
u'switch_chassis_descr': None,
u'switch_port_descr': None
u'switch_port_descr': None,
u'has_carrier': True,
},
{
u'mac_address': u'00:0c:29:8c:11:b2',
u'name': u'eth1',
u'ipv4_address': None,
u'switch_chassis_descr': None,
'switch_port_descr': None
u'switch_port_descr': None,
u'has_carrier': True,
}
],
u'cpu': {
@@ -295,14 +297,16 @@ class TestBaseIronicPythonAgent(test_base.BaseTestCase):
u'name': u'eth0',
u'ipv4_address': None,
u'switch_chassis_descr': None,
u'switch_port_descr': None
u'switch_port_descr': None,
u'has_carrier': True,
},
{
u'mac_address': u'00:0c:29:8c:11:b2',
u'name': u'eth1',
u'ipv4_address': None,
u'switch_chassis_descr': None,
'switch_port_descr': None
u'switch_port_descr': None,
u'has_carrier': True,
}
],
u'cpu': {

View File

@@ -0,0 +1,7 @@
---
features:
- The "has_carrier" flag was added to the network interface information.
fixes:
- Inspection code now waits up to 1 minute for all NICs to get their IP addresses.
Otherwise inspection fails for some users due to race between DIB DHCP code
and IPA startup. See https://bugs.launchpad.net/bugs/1564954 for details.