Add a configurable sleep before IPA starts working

Some kernel modules take substantial time to initialize. For example,
with mpt2sas RAID driver inspection and deployment randomly fail
due to IPA starting before the driver finishes initialization.

As much as I hate it, the only way to guarantee that the hardware is
truely initalized is to wait for it. Apparently all hardware in Linux
is treated as hotplugged, so there is no such thing as "hardware
initialization is finished". Operators can add a sleep based on their
knowledge of their hardware.

The default behaviour remains the same.

Change-Id: I0446ae81d760dacaf31eea6ad9f9eaa098cf5e93
Partial-Bug: #1582797
This commit is contained in:
Dmitry Tantsur
2016-05-19 17:21:28 +02:00
parent 015fad6054
commit c09753bb60
4 changed files with 51 additions and 2 deletions

View File

@@ -147,7 +147,8 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
def __init__(self, api_url, advertise_address, listen_address, def __init__(self, api_url, advertise_address, listen_address,
ip_lookup_attempts, ip_lookup_sleep, network_interface, ip_lookup_attempts, ip_lookup_sleep, network_interface,
lookup_timeout, lookup_interval, driver_name, standalone): lookup_timeout, lookup_interval, driver_name, standalone,
hardware_initialization_delay=0):
super(IronicPythonAgent, self).__init__() super(IronicPythonAgent, self).__init__()
self.ext_mgr = extension.ExtensionManager( self.ext_mgr = extension.ExtensionManager(
namespace='ironic_python_agent.extensions', namespace='ironic_python_agent.extensions',
@@ -175,6 +176,7 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
self.ip_lookup_sleep = ip_lookup_sleep self.ip_lookup_sleep = ip_lookup_sleep
self.network_interface = network_interface self.network_interface = network_interface
self.standalone = standalone self.standalone = standalone
self.hardware_initialization_delay = hardware_initialization_delay
def get_status(self): def get_status(self):
"""Retrieve a serializable status. """Retrieve a serializable status.
@@ -289,6 +291,12 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
# Cached hw managers at runtime, not load time. See bug 1490008. # Cached hw managers at runtime, not load time. See bug 1490008.
hardware.load_managers() hardware.load_managers()
# Operator-settable delay before hardware actually comes up.
# Helps with slow RAID drivers - see bug 1582797.
if self.hardware_initialization_delay > 0:
LOG.info('Waiting %d seconds before proceeding',
self.hardware_initialization_delay)
time.sleep(self.hardware_initialization_delay)
if not self.standalone: if not self.standalone:
# Inspection should be started before call to lookup, otherwise # Inspection should be started before call to lookup, otherwise

View File

@@ -129,6 +129,11 @@ cli_opts = [
help='Whether to wait for all interfaces to get their IP ' help='Whether to wait for all interfaces to get their IP '
'addresses before inspection. If set to false ' 'addresses before inspection. If set to false '
'(the default), only waits for the PXE interface.'), '(the default), only waits for the PXE interface.'),
cfg.IntOpt('hardware_initialization_delay',
default=APARAMS.get('ipa-hardware-initialization-delay', 0),
help='How much time (in seconds) to wait for hardware to '
'initialize before proceeding with any actions.'),
] ]
CONF.register_cli_opts(cli_opts) CONF.register_cli_opts(cli_opts)
@@ -153,4 +158,5 @@ def run():
CONF.lookup_timeout, CONF.lookup_timeout,
CONF.lookup_interval, CONF.lookup_interval,
CONF.driver_name, CONF.driver_name,
CONF.standalone).run() CONF.standalone,
CONF.hardware_initialization_delay).run()

View File

@@ -232,6 +232,37 @@ class TestBaseAgent(test_base.BaseTestCase):
self.agent.heartbeater.start.assert_called_once_with() self.agent.heartbeater.start.assert_called_once_with()
@mock.patch.object(time, 'sleep', autospec=True)
@mock.patch('wsgiref.simple_server.make_server', autospec=True)
@mock.patch.object(hardware.HardwareManager, 'list_hardware_info')
def test_run_with_sleep(self, mocked_list_hardware, wsgi_server_cls,
mocked_sleep):
CONF.set_override('inspection_callback_url', '', enforce_type=True)
wsgi_server = wsgi_server_cls.return_value
wsgi_server.start.side_effect = KeyboardInterrupt()
self.agent.hardware_initialization_delay = 10
self.agent.heartbeater = mock.Mock()
self.agent.api_client.lookup_node = mock.Mock()
self.agent.api_client.lookup_node.return_value = {
'node': {
'uuid': 'deadbeef-dabb-ad00-b105-f00d00bab10c'
},
'heartbeat_timeout': 300
}
self.agent.run()
listen_addr = ('192.0.2.1', 9999)
wsgi_server_cls.assert_called_once_with(
listen_addr[0],
listen_addr[1],
self.agent.api,
server_class=simple_server.WSGIServer)
wsgi_server.serve_forever.assert_called_once_with()
self.agent.heartbeater.start.assert_called_once_with()
mocked_sleep.assert_called_once_with(10)
def test_async_command_success(self): def test_async_command_success(self):
result = base.AsyncCommandResult('foo_command', {'fail': False}, result = base.AsyncCommandResult('foo_command', {'fail': False},
foo_execute) foo_execute)

View File

@@ -0,0 +1,4 @@
---
features:
- Add a configurable (and disabled by default) sleep before IPA does any
actions.