From c09753bb60be1819ef102ba8a86b534bba146ccb Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Thu, 19 May 2016 17:21:28 +0200 Subject: [PATCH] Add a configurable sleep before IPA starts working Some kernel modules take substantial time to initialize. For example, with mpt2sas RAID driver inspection and deployment randomly fail due to IPA starting before the driver finishes initialization. As much as I hate it, the only way to guarantee that the hardware is truely initalized is to wait for it. Apparently all hardware in Linux is treated as hotplugged, so there is no such thing as "hardware initialization is finished". Operators can add a sleep based on their knowledge of their hardware. The default behaviour remains the same. Change-Id: I0446ae81d760dacaf31eea6ad9f9eaa098cf5e93 Partial-Bug: #1582797 --- ironic_python_agent/agent.py | 10 +++++- ironic_python_agent/cmd/agent.py | 8 ++++- ironic_python_agent/tests/unit/test_agent.py | 31 +++++++++++++++++++ .../notes/sleep-ebe58fbe07d30219.yaml | 4 +++ 4 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/sleep-ebe58fbe07d30219.yaml diff --git a/ironic_python_agent/agent.py b/ironic_python_agent/agent.py index 75a7e8aab..a0f183fde 100644 --- a/ironic_python_agent/agent.py +++ b/ironic_python_agent/agent.py @@ -147,7 +147,8 @@ class IronicPythonAgent(base.ExecuteCommandMixin): def __init__(self, api_url, advertise_address, listen_address, ip_lookup_attempts, ip_lookup_sleep, network_interface, - lookup_timeout, lookup_interval, driver_name, standalone): + lookup_timeout, lookup_interval, driver_name, standalone, + hardware_initialization_delay=0): super(IronicPythonAgent, self).__init__() self.ext_mgr = extension.ExtensionManager( namespace='ironic_python_agent.extensions', @@ -175,6 +176,7 @@ class IronicPythonAgent(base.ExecuteCommandMixin): self.ip_lookup_sleep = ip_lookup_sleep self.network_interface = network_interface self.standalone = standalone + self.hardware_initialization_delay = hardware_initialization_delay def get_status(self): """Retrieve a serializable status. @@ -289,6 +291,12 @@ class IronicPythonAgent(base.ExecuteCommandMixin): # Cached hw managers at runtime, not load time. See bug 1490008. hardware.load_managers() + # Operator-settable delay before hardware actually comes up. + # Helps with slow RAID drivers - see bug 1582797. + if self.hardware_initialization_delay > 0: + LOG.info('Waiting %d seconds before proceeding', + self.hardware_initialization_delay) + time.sleep(self.hardware_initialization_delay) if not self.standalone: # Inspection should be started before call to lookup, otherwise diff --git a/ironic_python_agent/cmd/agent.py b/ironic_python_agent/cmd/agent.py index 8cec49990..705bf5873 100644 --- a/ironic_python_agent/cmd/agent.py +++ b/ironic_python_agent/cmd/agent.py @@ -129,6 +129,11 @@ cli_opts = [ help='Whether to wait for all interfaces to get their IP ' 'addresses before inspection. If set to false ' '(the default), only waits for the PXE interface.'), + + cfg.IntOpt('hardware_initialization_delay', + default=APARAMS.get('ipa-hardware-initialization-delay', 0), + help='How much time (in seconds) to wait for hardware to ' + 'initialize before proceeding with any actions.'), ] CONF.register_cli_opts(cli_opts) @@ -153,4 +158,5 @@ def run(): CONF.lookup_timeout, CONF.lookup_interval, CONF.driver_name, - CONF.standalone).run() + CONF.standalone, + CONF.hardware_initialization_delay).run() diff --git a/ironic_python_agent/tests/unit/test_agent.py b/ironic_python_agent/tests/unit/test_agent.py index b51e68518..cd3009ff7 100644 --- a/ironic_python_agent/tests/unit/test_agent.py +++ b/ironic_python_agent/tests/unit/test_agent.py @@ -232,6 +232,37 @@ class TestBaseAgent(test_base.BaseTestCase): self.agent.heartbeater.start.assert_called_once_with() + @mock.patch.object(time, 'sleep', autospec=True) + @mock.patch('wsgiref.simple_server.make_server', autospec=True) + @mock.patch.object(hardware.HardwareManager, 'list_hardware_info') + def test_run_with_sleep(self, mocked_list_hardware, wsgi_server_cls, + mocked_sleep): + CONF.set_override('inspection_callback_url', '', enforce_type=True) + wsgi_server = wsgi_server_cls.return_value + wsgi_server.start.side_effect = KeyboardInterrupt() + + self.agent.hardware_initialization_delay = 10 + self.agent.heartbeater = mock.Mock() + self.agent.api_client.lookup_node = mock.Mock() + self.agent.api_client.lookup_node.return_value = { + 'node': { + 'uuid': 'deadbeef-dabb-ad00-b105-f00d00bab10c' + }, + 'heartbeat_timeout': 300 + } + self.agent.run() + + listen_addr = ('192.0.2.1', 9999) + wsgi_server_cls.assert_called_once_with( + listen_addr[0], + listen_addr[1], + self.agent.api, + server_class=simple_server.WSGIServer) + wsgi_server.serve_forever.assert_called_once_with() + + self.agent.heartbeater.start.assert_called_once_with() + mocked_sleep.assert_called_once_with(10) + def test_async_command_success(self): result = base.AsyncCommandResult('foo_command', {'fail': False}, foo_execute) diff --git a/releasenotes/notes/sleep-ebe58fbe07d30219.yaml b/releasenotes/notes/sleep-ebe58fbe07d30219.yaml new file mode 100644 index 000000000..163034af3 --- /dev/null +++ b/releasenotes/notes/sleep-ebe58fbe07d30219.yaml @@ -0,0 +1,4 @@ +--- +features: + - Add a configurable (and disabled by default) sleep before IPA does any + actions.