Add hardware manager interface for hardware initialization

Some kernel modules take substantial time to initialize. For example,
with mpt2sas RAID driver inspection and deployment randomly fail
due to IPA starting before the driver finishes initialization.

Add a new hardware manager method initialize_hardware, which gets
run on start up before other hardware manager method invocations.

The generic implementation is to call udev settle and wait for
at least one suitable disk device to appear with the hardcoded
timeout of 15 seconds. Also preload the IPMI modules instead of
calling modprobe every time the inventory is requested.

Change-Id: If7758bb6e3faac7d05451baa3a26adb8ab9953d5
Partial-Bug: #1582797
This commit is contained in:
Dmitry Tantsur 2016-05-18 12:28:56 +02:00
parent 89f4f33128
commit 0962cae1da
5 changed files with 125 additions and 8 deletions

@ -289,6 +289,8 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
# Cached hw managers at runtime, not load time. See bug 1490008.
hardware.load_managers()
# Request the hw manager to do long initializations
hardware.dispatch_to_managers('initialize_hardware')
if not self.standalone:
# Inspection should be started before call to lookup, otherwise

@ -16,6 +16,7 @@ import abc
import functools
import os
import shlex
import time
import netifaces
from oslo_concurrency import processutils
@ -38,6 +39,9 @@ UNIT_CONVERTER = pint.UnitRegistry(filename=None)
UNIT_CONVERTER.define('MB = []')
UNIT_CONVERTER.define('GB = 1024 MB')
_DISK_WAIT_ATTEMPTS = 5
_DISK_WAIT_DELAY = 3
def _get_device_vendor(dev):
"""Get the vendor name of a given device."""
@ -385,17 +389,57 @@ class HardwareManager(object):
'version': getattr(self, 'HARDWARE_MANAGER_VERSION', '1.0')
}
def initialize_hardware(self):
"""Initialize hardware on the agent start up.
This method will be called once on start up before any calls
to list_hardware_info are made.
The default implementation does nothing.
"""
class GenericHardwareManager(HardwareManager):
HARDWARE_MANAGER_NAME = 'generic_hardware_manager'
HARDWARE_MANAGER_VERSION = '1.0'
# These modules are rarely loaded automatically
_PRELOADED_MODULES = ['ipmi_msghandler', 'ipmi_devintf', 'ipmi_si']
def __init__(self):
self.sys_path = '/sys'
def evaluate_hardware_support(self):
return HardwareSupport.GENERIC
def initialize_hardware(self):
LOG.debug('Initializing hardware')
self._preload_modules()
_udev_settle()
self._wait_for_disks()
def _preload_modules(self):
# TODO(dtantsur): try to load as many kernel modules for present
# hardware as it's possible.
for mod in self._PRELOADED_MODULES:
utils.try_execute('modprobe', mod)
def _wait_for_disks(self):
# Wait for at least one suitable disk to show up, otherwise neither
# inspection not deployment have any chances to succeed.
for attempt in range(_DISK_WAIT_ATTEMPTS):
try:
self.get_os_install_device()
except errors.DeviceNotFound:
LOG.debug('Still waiting for at least one disk to appear, '
'attempt %d of %d', attempt + 1, _DISK_WAIT_ATTEMPTS)
time.sleep(_DISK_WAIT_DELAY)
else:
break
else:
LOG.warning('No disks detected in %d seconds',
_DISK_WAIT_DELAY * _DISK_WAIT_ATTEMPTS)
def _get_interface_info(self, interface_name):
addr_path = '{0}/class/net/{1}/address'.format(self.sys_path,
interface_name)
@ -738,11 +782,6 @@ class GenericHardwareManager(HardwareManager):
return True
def get_bmc_address(self):
# These modules are rarely loaded automatically
utils.try_execute('modprobe', 'ipmi_msghandler')
utils.try_execute('modprobe', 'ipmi_devintf')
utils.try_execute('modprobe', 'ipmi_si')
try:
out, _e = utils.execute(
"ipmitool lan print | grep -e 'IP Address [^S]' "

@ -167,9 +167,12 @@ class TestBaseAgent(test_base.BaseTestCase):
self.assertEqual(pkg_resources.get_distribution('ironic-python-agent')
.version, status.version)
@mock.patch.object(hardware.GenericHardwareManager, 'initialize_hardware',
autospec=True)
@mock.patch('wsgiref.simple_server.make_server', autospec=True)
@mock.patch.object(hardware.HardwareManager, 'list_hardware_info')
def test_run(self, mocked_list_hardware, wsgi_server_cls):
def test_run(self, mocked_list_hardware, wsgi_server_cls,
mocked_init_hardware):
CONF.set_override('inspection_callback_url', '', enforce_type=True)
wsgi_server = wsgi_server_cls.return_value
wsgi_server.start.side_effect = KeyboardInterrupt()
@ -193,12 +196,15 @@ class TestBaseAgent(test_base.BaseTestCase):
wsgi_server.serve_forever.assert_called_once_with()
self.agent.heartbeater.start.assert_called_once_with()
mocked_init_hardware.assert_called_once_with(mock.ANY)
@mock.patch.object(inspector, 'inspect', autospec=True)
@mock.patch.object(hardware.GenericHardwareManager, 'initialize_hardware',
autospec=True)
@mock.patch('wsgiref.simple_server.make_server', autospec=True)
@mock.patch.object(hardware.HardwareManager, 'list_hardware_info')
def test_run_with_inspection(self, mocked_list_hardware, wsgi_server_cls,
mocked_inspector):
mocked_init_hardware, mocked_inspector):
CONF.set_override('inspection_callback_url', 'http://foo/bar',
enforce_type=True)
@ -231,6 +237,7 @@ class TestBaseAgent(test_base.BaseTestCase):
self.agent.api_client.lookup_node.call_args[1]['node_uuid'])
self.agent.heartbeater.start.assert_called_once_with()
mocked_init_hardware.assert_called_once_with(mock.ANY)
def test_async_command_success(self):
result = base.AsyncCommandResult('foo_command', {'fail': False},

@ -12,9 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import time
import mock
import netifaces
import os
from oslo_concurrency import processutils
from oslo_utils import units
from oslotest import base as test_base
@ -1085,6 +1087,66 @@ class TestGenericHardwareManager(test_base.BaseTestCase):
self.hardware.get_system_vendor_info().manufacturer)
@mock.patch.object(time, 'sleep', autospec=True)
@mock.patch.object(hardware.GenericHardwareManager,
'get_os_install_device', autospec=True)
@mock.patch.object(utils, 'execute', autospec=True)
class TestGenericHardwareManagerInitializeHardware(test_base.BaseTestCase):
def setUp(self):
super(TestGenericHardwareManagerInitializeHardware, self).setUp()
self.hardware = hardware.GenericHardwareManager()
def test_ok(self, mocked_execute, mocked_os_dev, mocked_sleep):
self.hardware.initialize_hardware()
expected_execute_calls = [
mock.call('modprobe', mod)
for mod in self.hardware._PRELOADED_MODULES
]
expected_execute_calls.append(mock.call('udevadm', 'settle'))
self.assertEqual(expected_execute_calls, mocked_execute.call_args_list)
mocked_os_dev.assert_called_once_with(mock.ANY)
self.assertFalse(mocked_sleep.called)
def test_disk_delayed(self, mocked_execute, mocked_os_dev, mocked_sleep):
mocked_os_dev.side_effect = [
errors.DeviceNotFound(''),
errors.DeviceNotFound(''),
None
]
self.hardware.initialize_hardware()
expected_execute_calls = [
mock.call('modprobe', mod)
for mod in self.hardware._PRELOADED_MODULES
]
expected_execute_calls.append(mock.call('udevadm', 'settle'))
self.assertEqual(expected_execute_calls, mocked_execute.call_args_list)
mocked_os_dev.assert_called_with(mock.ANY)
self.assertEqual(3, mocked_os_dev.call_count)
self.assertEqual(2, mocked_sleep.call_count)
@mock.patch.object(hardware.LOG, 'warning', autospec=True)
def test_no_disk(self, mocked_warn, mocked_execute, mocked_os_dev,
mocked_sleep):
mocked_os_dev.side_effect = errors.DeviceNotFound('')
self.hardware.initialize_hardware()
expected_execute_calls = [
mock.call('modprobe', mod)
for mod in self.hardware._PRELOADED_MODULES
]
expected_execute_calls.append(mock.call('udevadm', 'settle'))
self.assertEqual(expected_execute_calls, mocked_execute.call_args_list)
mocked_os_dev.assert_called_with(mock.ANY)
self.assertEqual(hardware._DISK_WAIT_ATTEMPTS,
mocked_os_dev.call_count)
self.assertEqual(hardware._DISK_WAIT_ATTEMPTS, mocked_sleep.call_count)
self.assertTrue(mocked_warn.called)
@mock.patch.object(utils, 'execute', autospec=True)
class TestModuleFunctions(test_base.BaseTestCase):

@ -0,0 +1,7 @@
---
features:
- Add a new hardware manager method "initialize_hardware" which can be
overriden to provide early oneshot hardware initialization, such as loading
kernel modules or probing hardware. The generic implementation loads the
IPMI modules, calls "udev settle" and waits for at least one suitable disk
to appear in the "lsblk" output.