[inspection] wait for the PXE DHCP by default and remove the carrier check

We hoped that checking /sys/class/net/XXX/carrier will allow us
to not wait for interfaces that are not connected at all.
In reality this field turned out to be unreliable. For example, it is
also set to 0 when interface is down or is being configured.
The bug https://bugzilla.redhat.com/show_bug.cgi?id=1327255 shows
the case when carrier is 0 for all interfaces, including one that is
used to post back data, which is obvious non-sense.

This change removes check on carrier for the loop. To avoid 60 seconds
wait for people with several NIC's, it's changed to only wait for the
PXE booting NIC, which obviously must get an IP address.

This makes IP addresses in the inspection data for other NIC's somewhat
unreliable. A new option inspection_dhcp_all_interfaces is introduced
to allow waiting for all NIC's to get IP addresses.

This change should finally fix bug 1564954.

Change-Id: I8b04bf726980fdcf6bd536c6bb28e30ac50658fb
Related-Bug: #1564954
This commit is contained in:
Dmitry Tantsur 2016-05-06 13:26:44 +02:00
parent 4be5e08408
commit 6da6ace384
4 changed files with 98 additions and 16 deletions

View File

@ -118,9 +118,17 @@ cli_opts = [
cfg.IntOpt('inspection_dhcp_wait_timeout',
default=APARAMS.get('ipa-inspection-dhcp-wait-timeout',
inspector.DEFAULT_DHCP_WAIT_TIMEOUT),
help='Maximum time (in seconds) to wait for all NIC\'s '
'to get their IP addresses via DHCP before inspection. '
help='Maximum time (in seconds) to wait for the PXE NIC '
'(or all NICs if inspection_dhcp_all_interfaces is True) '
'to get its IP address via DHCP before inspection. '
'Set to 0 to disable waiting completely.'),
cfg.BoolOpt('inspection_dhcp_all_interfaces',
default=APARAMS.get('ipa-inspection-dhcp-all-interfaces',
False),
help='Whether to wait for all interfaces to get their IP '
'addresses before inspection. If set to false '
'(the default), only waits for the PXE interface.'),
]
CONF.register_cli_opts(cli_opts)

View File

@ -219,10 +219,20 @@ def discover_scheduling_properties(inventory, data, root_disk=None):
LOG.info('value for %s is %s', key, data[key])
def wait_for_dhcp():
"""Wait until all NIC's get their IP addresses via DHCP or timeout happens.
def _normalize_mac(mac):
"""Convert MAC to a well-known format aa:bb:cc:dd:ee:ff."""
if '-' in mac:
# pxelinux format is 01-aa-bb-cc-dd-ee-ff
mac = mac.split('-', 1)[1]
mac = mac.replace('-', ':')
return mac.lower()
Ignores interfaces which do not even have a carrier.
def wait_for_dhcp():
"""Wait until NIC's get their IP addresses via DHCP or timeout happens.
Depending on the value of inspection_dhcp_all_interfaces configuration
option will wait for either all or only PXE booting NIC.
Note: only supports IPv4 addresses for now.
@ -232,11 +242,22 @@ def wait_for_dhcp():
if not CONF.inspection_dhcp_wait_timeout:
return True
pxe_mac = utils.get_agent_params().get('BOOTIF')
if pxe_mac:
pxe_mac = _normalize_mac(pxe_mac)
elif not CONF.inspection_dhcp_all_interfaces:
LOG.warning('No PXE boot interface known - not waiting for it '
'to get the IP address')
return False
threshold = time.time() + CONF.inspection_dhcp_wait_timeout
while time.time() <= threshold:
interfaces = hardware.dispatch_to_managers('list_network_interfaces')
interfaces = [iface for iface in interfaces
if CONF.inspection_dhcp_all_interfaces
or iface.mac_address.lower() == pxe_mac]
missing = [iface.name for iface in interfaces
if iface.has_carrier and not iface.ipv4_address]
if not iface.ipv4_address]
if not missing:
return True

View File

@ -461,6 +461,7 @@ class TestCollectExtraHardware(unittest.TestCase):
mock_execute.assert_called_once_with('hardware-detect')
@mock.patch.object(utils, 'get_agent_params', lambda: {'BOOTIF': '01-cdef'})
@mock.patch.object(hardware, 'dispatch_to_managers', autospec=True)
class TestWaitForDhcp(unittest.TestCase):
def setUp(self):
@ -469,20 +470,29 @@ class TestWaitForDhcp(unittest.TestCase):
inspector.DEFAULT_DHCP_WAIT_TIMEOUT)
@mock.patch.object(time, 'sleep', autospec=True)
def test_ok(self, mocked_sleep, mocked_dispatch):
def test_all(self, mocked_sleep, mocked_dispatch):
CONF.set_override('inspection_dhcp_all_interfaces', True)
# We used to rely on has_carrier check, but we've found it unreliable
# in the DIB image, so we ignore its value.
mocked_dispatch.side_effect = [
[hardware.NetworkInterface(name='em0', mac_addr='abcd',
ipv4_address=None),
hardware.NetworkInterface(name='em1', mac_addr='abcd',
ipv4_address='1.2.3.4')],
ipv4_address=None,
has_carrier=False),
hardware.NetworkInterface(name='em1', mac_addr='cdef',
ipv4_address='1.2.3.4',
has_carrier=False)],
[hardware.NetworkInterface(name='em0', mac_addr='abcd',
ipv4_address=None),
hardware.NetworkInterface(name='em1', mac_addr='abcd',
ipv4_address='1.2.3.4')],
ipv4_address=None,
has_carrier=True),
hardware.NetworkInterface(name='em1', mac_addr='cdef',
ipv4_address='1.2.3.4',
has_carrier=True)],
[hardware.NetworkInterface(name='em0', mac_addr='abcd',
ipv4_address='1.1.1.1'),
hardware.NetworkInterface(name='em1', mac_addr='abcd',
ipv4_address='1.2.3.4')],
ipv4_address='1.1.1.1',
has_carrier=True),
hardware.NetworkInterface(name='em1', mac_addr='cdef',
ipv4_address='1.2.3.4',
has_carrier=True)],
]
self.assertTrue(inspector.wait_for_dhcp())
@ -491,8 +501,33 @@ class TestWaitForDhcp(unittest.TestCase):
self.assertEqual(2, mocked_sleep.call_count)
self.assertEqual(3, mocked_dispatch.call_count)
@mock.patch.object(time, 'sleep', autospec=True)
def test_boot_only(self, mocked_sleep, mocked_dispatch):
CONF.set_override('inspection_dhcp_all_interfaces', False)
mocked_dispatch.side_effect = [
[hardware.NetworkInterface(name='em0', mac_addr='abcd',
ipv4_address=None,
has_carrier=False),
hardware.NetworkInterface(name='em1', mac_addr='cdef',
ipv4_address=None,
has_carrier=False)],
[hardware.NetworkInterface(name='em0', mac_addr='abcd',
ipv4_address=None,
has_carrier=True),
hardware.NetworkInterface(name='em1', mac_addr='cdef',
ipv4_address='1.2.3.4',
has_carrier=True)],
]
self.assertTrue(inspector.wait_for_dhcp())
mocked_dispatch.assert_called_with('list_network_interfaces')
self.assertEqual(1, mocked_sleep.call_count)
self.assertEqual(2, mocked_dispatch.call_count)
@mock.patch.object(inspector, '_DHCP_RETRY_INTERVAL', 0.01)
def test_timeout(self, mocked_dispatch):
CONF.set_override('inspection_dhcp_all_interfaces', True)
CONF.set_override('inspection_dhcp_wait_timeout', 0.02)
mocked_dispatch.return_value = [
@ -512,3 +547,13 @@ class TestWaitForDhcp(unittest.TestCase):
self.assertTrue(inspector.wait_for_dhcp())
self.assertFalse(mocked_dispatch.called)
class TestNormalizeMac(unittest.TestCase):
def test_correct_mac(self):
self.assertEqual('11:22:33:aa:bb:cc',
inspector._normalize_mac('11:22:33:aa:BB:cc'))
def test_pxelinux_mac(self):
self.assertEqual('11:22:33:aa:bb:cc',
inspector._normalize_mac('01-11-22-33-aa-BB-cc'))

View File

@ -0,0 +1,8 @@
---
fixes:
- During inspection wait only for a PXE booting NIC to get its IP by default.
Introduce a new "inspection_dhcp_all_interfaces" option to enable waiting
for all interfaces instead.
- Stop checking the "has_carrier" field when waiting for NIC's to get IP
addresses, as it might be set to "False" when the interface is being
configured.