diff --git a/ironic/cmd/conductor.py b/ironic/cmd/conductor.py index 5fa4c84899..a932cb22fb 100644 --- a/ironic/cmd/conductor.py +++ b/ironic/cmd/conductor.py @@ -67,6 +67,12 @@ def main(): issue_startup_warnings(CONF) launcher = service.launch(CONF, mgr, restart_method='mutate') + + # NOTE(dtantsur): handling start-up failures before launcher.wait() helps + # notify systemd about them. Otherwise the launcher will report successful + # service start-up before checking the threads. + mgr.wait_for_start() + sys.exit(launcher.wait()) diff --git a/ironic/cmd/singleprocess.py b/ironic/cmd/singleprocess.py index 20a348ae5c..675bd1bc29 100644 --- a/ironic/cmd/singleprocess.py +++ b/ironic/cmd/singleprocess.py @@ -49,4 +49,9 @@ def main(): wsgi = wsgi_service.WSGIService('ironic_api', CONF.api.enable_ssl_api) launcher.launch_service(wsgi) + # NOTE(dtantsur): handling start-up failures before launcher.wait() helps + # notify systemd about them. Otherwise the launcher will report successful + # service start-up before checking the threads. + mgr.wait_for_start() + sys.exit(launcher.wait()) diff --git a/ironic/common/rpc_service.py b/ironic/common/rpc_service.py index 78379c9817..b0eec7758b 100644 --- a/ironic/common/rpc_service.py +++ b/ironic/common/rpc_service.py @@ -15,6 +15,8 @@ # under the License. import signal +import sys +import time from ironic_lib.json_rpc import server as json_rpc from oslo_config import cfg @@ -42,9 +44,29 @@ class RPCService(service.Service): self.topic = self.manager.topic self.rpcserver = None self.deregister = True + self._failure = None + self._started = False + + def wait_for_start(self): + while not self._started and not self._failure: + time.sleep(0.1) + if self._failure: + LOG.critical(self._failure) + sys.exit(self._failure) def start(self): + self._failure = None + self._started = False super(RPCService, self).start() + try: + self._real_start() + except Exception as exc: + self._failure = f"{exc.__class__.__name__}: {exc}" + raise + else: + self._started = True + + def _real_start(self): admin_context = context.get_admin_context() serializer = objects_base.IronicObjectSerializer(is_server=True) diff --git a/ironic/tests/unit/common/test_rpc_service.py b/ironic/tests/unit/common/test_rpc_service.py index 4e190f5e6f..8483bfb224 100644 --- a/ironic/tests/unit/common/test_rpc_service.py +++ b/ironic/tests/unit/common/test_rpc_service.py @@ -46,6 +46,8 @@ class TestRPCService(base.TestCase): mock_rpc, mock_ios, mock_target, mock_prepare_method): mock_rpc.return_value.start = mock.MagicMock() self.rpc_svc.handle_signal = mock.MagicMock() + self.assertFalse(self.rpc_svc._started) + self.assertFalse(self.rpc_svc._failure) self.rpc_svc.start() mock_ctx.assert_called_once_with() mock_target.assert_called_once_with(topic=self.rpc_svc.topic, @@ -55,6 +57,9 @@ class TestRPCService(base.TestCase): mock_init_method.assert_called_once_with(self.rpc_svc.manager, mock_ctx.return_value) self.assertIs(rpc.GLOBAL_MANAGER, self.rpc_svc.manager) + self.assertTrue(self.rpc_svc._started) + self.assertFalse(self.rpc_svc._failure) + self.rpc_svc.wait_for_start() # should be no-op @mock.patch.object(manager.ConductorManager, 'prepare_host', autospec=True) @mock.patch.object(oslo_messaging, 'Target', autospec=True) @@ -77,3 +82,29 @@ class TestRPCService(base.TestCase): mock_init_method.assert_called_once_with(self.rpc_svc.manager, mock_ctx.return_value) self.assertIs(rpc.GLOBAL_MANAGER, self.rpc_svc.manager) + + @mock.patch.object(manager.ConductorManager, 'prepare_host', autospec=True) + @mock.patch.object(oslo_messaging, 'Target', autospec=True) + @mock.patch.object(objects_base, 'IronicObjectSerializer', autospec=True) + @mock.patch.object(rpc, 'get_server', autospec=True) + @mock.patch.object(manager.ConductorManager, 'init_host', autospec=True) + @mock.patch.object(context, 'get_admin_context', autospec=True) + def test_start_failure(self, mock_ctx, mock_init_method, mock_rpc, + mock_ios, mock_target, mock_prepare_method): + mock_rpc.return_value.start = mock.MagicMock() + self.rpc_svc.handle_signal = mock.MagicMock() + mock_init_method.side_effect = RuntimeError("boom") + self.assertFalse(self.rpc_svc._started) + self.assertFalse(self.rpc_svc._failure) + self.assertRaises(RuntimeError, self.rpc_svc.start) + mock_ctx.assert_called_once_with() + mock_target.assert_called_once_with(topic=self.rpc_svc.topic, + server="fake_host") + mock_ios.assert_called_once_with(is_server=True) + mock_prepare_method.assert_called_once_with(self.rpc_svc.manager) + mock_init_method.assert_called_once_with(self.rpc_svc.manager, + mock_ctx.return_value) + self.assertIsNone(rpc.GLOBAL_MANAGER) + self.assertFalse(self.rpc_svc._started) + self.assertIn("boom", self.rpc_svc._failure) + self.assertRaises(SystemExit, self.rpc_svc.wait_for_start) diff --git a/releasenotes/notes/service-wait-e85cbe7978f61764.yaml b/releasenotes/notes/service-wait-e85cbe7978f61764.yaml new file mode 100644 index 0000000000..c2fff601d1 --- /dev/null +++ b/releasenotes/notes/service-wait-e85cbe7978f61764.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + The ``ironic`` and ``ironic-conductor`` services now wait for the conductor + manager to start before notifying systemd about the successful start-up.