Do not start kuryr-daemon when worker_num <= 1
We've discovered that running kuryr-daemon with [cni_daemon]worker_num=1 breaks pyroute2.IPDB's ability to correctly close threads, leading to a process leak. This commit makes sure kuryr-daemon will fail to start when worker_num <= 1. This required a few more changes in order to make sure that when any kuryr-daemon subservice dies, kuryr-daemon will shutdown too. Change-Id: I41afc6fa67abfff62d2f0017db508051a1e7edf4
This commit is contained in:
parent
90d08658dc
commit
87981d0652
@ -195,6 +195,12 @@ class DaemonServer(object):
|
||||
LOG.exception('Cannot start server on %s.', server_pair)
|
||||
raise
|
||||
|
||||
if CONF.cni_daemon.worker_num <= 1:
|
||||
msg = ('[cni_daemon]worker_num needs to be set to a value higher '
|
||||
'than 1')
|
||||
LOG.critical(msg)
|
||||
raise exceptions.InvalidKuryrConfiguration(msg)
|
||||
|
||||
try:
|
||||
self._server = serving.make_server(
|
||||
address, port, self.application, threaded=False,
|
||||
@ -387,6 +393,7 @@ class CNIDaemonServiceManager(cotyledon.ServiceManager):
|
||||
# NOTE(mdulko): Default shutdown timeout is 60 seconds and K8s won't
|
||||
# wait more by default anyway.
|
||||
super(CNIDaemonServiceManager, self).__init__()
|
||||
self._server_service = None
|
||||
# TODO(dulek): Use cotyledon.oslo_config_glue to support conf reload.
|
||||
|
||||
# TODO(vikasc): Should be done using dynamically loadable OVO types
|
||||
@ -403,11 +410,18 @@ class CNIDaemonServiceManager(cotyledon.ServiceManager):
|
||||
healthy = multiprocessing.Value(c_bool, True)
|
||||
metrics = self.manager.Queue()
|
||||
self.add(CNIDaemonWatcherService, workers=1, args=(registry, healthy,))
|
||||
self.add(CNIDaemonServerService, workers=1, args=(registry, healthy,
|
||||
metrics,))
|
||||
self._server_service = self.add(CNIDaemonServerService, workers=1,
|
||||
args=(registry, healthy, metrics,))
|
||||
self.add(CNIDaemonHealthServerService, workers=1, args=(healthy,))
|
||||
self.add(CNIDaemonExporterService, workers=1, args=(metrics,))
|
||||
self.register_hooks(on_terminate=self.terminate)
|
||||
|
||||
def shutdown_hook(service_id, worker_id, exit_code):
|
||||
LOG.critical(f'Child Service {service_id} had exited with code '
|
||||
f'{exit_code}, stopping kuryr-daemon')
|
||||
self.shutdown()
|
||||
|
||||
self.register_hooks(on_terminate=self.terminate,
|
||||
on_dead_worker=shutdown_hook)
|
||||
|
||||
def run(self):
|
||||
# FIXME(darshna): Remove pyroute2 IPDB deprecation warning, remove
|
||||
@ -440,12 +454,13 @@ class CNIDaemonServiceManager(cotyledon.ServiceManager):
|
||||
|
||||
def terminate(self):
|
||||
self._terminate_called.set()
|
||||
LOG.info("Gracefully stopping DaemonServer service..")
|
||||
self.reconfigure(self._server_service, 0)
|
||||
for worker in self._running_services[self._server_service]:
|
||||
worker.terminate()
|
||||
for worker in self._running_services[self._server_service]:
|
||||
worker.join()
|
||||
if self._server_service:
|
||||
LOG.info("Gracefully stopping DaemonServer service..")
|
||||
self.reconfigure(self._server_service, 0)
|
||||
for worker in self._running_services[self._server_service]:
|
||||
worker.terminate()
|
||||
for worker in self._running_services[self._server_service]:
|
||||
worker.join()
|
||||
LOG.info("Stopping registry manager...")
|
||||
self.manager.shutdown()
|
||||
LOG.info("Continuing with shutdown")
|
||||
|
@ -24,6 +24,10 @@ class IntegrityError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidKuryrConfiguration(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
class ResourceNotReady(Exception):
|
||||
def __init__(self, resource):
|
||||
msg = resource
|
||||
|
@ -11,7 +11,7 @@ click==6.7
|
||||
cliff==2.11.0
|
||||
cmd2==0.8.2
|
||||
contextlib2==0.5.5
|
||||
cotyledon==1.5.0
|
||||
cotyledon==1.7.3
|
||||
coverage==4.0
|
||||
ddt==1.0.1
|
||||
debtcollector==1.19.0
|
||||
|
@ -2,7 +2,7 @@
|
||||
# of appearance. Changing the order has an impact on the overall integration
|
||||
# process, which may cause wedges in the gate later.
|
||||
|
||||
cotyledon>=1.5.0 # Apache-2.0
|
||||
cotyledon>=1.7.3 # Apache-2.0
|
||||
Flask!=0.11,>=0.12.3 # BSD
|
||||
kuryr-lib>=0.5.0 # Apache-2.0
|
||||
pbr!=2.1.0,>=2.0.0 # Apache-2.0
|
||||
|
Loading…
x
Reference in New Issue
Block a user