From ec55126f6bcc3448acd2259ccfcd8de5a9d2b4e2 Mon Sep 17 00:00:00 2001 From: Tobias Henkel Date: Tue, 15 Sep 2020 13:47:05 +0200 Subject: [PATCH] Add zookeeper-timeout connection config The default zookeeper session timout is 10 seconds which is not enough on a highly loaded nodepool. Like in zuul make this configurable so we can avoid session losses. Change-Id: Id7087141174c84c6cdcbb3933c233f5fa0e7d569 --- doc/source/configuration.rst | 5 +++++ nodepool/builder.py | 4 +++- nodepool/cmd/config_validator.py | 1 + nodepool/config.py | 6 ++++++ nodepool/launcher.py | 4 +++- nodepool/tests/fixtures/node.yaml | 2 ++ nodepool/zk.py | 9 ++++++--- releasenotes/notes/zk-timeout-bede88920e248c96.yaml | 5 +++++ 8 files changed, 31 insertions(+), 5 deletions(-) create mode 100644 releasenotes/notes/zk-timeout-bede88920e248c96.yaml diff --git a/doc/source/configuration.rst b/doc/source/configuration.rst index fbdb76f9a..94b3d8bd7 100644 --- a/doc/source/configuration.rst +++ b/doc/source/configuration.rst @@ -160,6 +160,11 @@ Options The path to the PEM encoded CA certificate. +.. attr:: zookeeper-timeout + :type: float + :default: 10.0 + + The ZooKeeper session timeout, in seconds. .. attr:: labels :type: list diff --git a/nodepool/builder.py b/nodepool/builder.py index d1a48f736..40f48025f 100644 --- a/nodepool/builder.py +++ b/nodepool/builder.py @@ -1407,7 +1407,9 @@ class NodePoolBuilder(object): list(self._config.zookeeper_servers.values()), tls_cert=self._config.zookeeper_tls_cert, tls_key=self._config.zookeeper_tls_key, - tls_ca=self._config.zookeeper_tls_ca) + tls_ca=self._config.zookeeper_tls_ca, + timeout=self._config.zookeeper_timeout, + ) self.log.debug('Starting listener for build jobs') diff --git a/nodepool/cmd/config_validator.py b/nodepool/cmd/config_validator.py index f6a1da4fd..4c464a1e8 100644 --- a/nodepool/cmd/config_validator.py +++ b/nodepool/cmd/config_validator.py @@ -74,6 +74,7 @@ class ConfigValidator: 'chroot': str, }], 'zookeeper-tls': zk_tls, + 'zookeeper-timeout': float, 'providers': list, 'labels': [label], 'diskimages': [diskimage], diff --git a/nodepool/config.py b/nodepool/config.py index 8e5a7c21c..f10e29a6c 100644 --- a/nodepool/config.py +++ b/nodepool/config.py @@ -38,6 +38,7 @@ class Config(ConfigValue): self.providers = {} self.provider_managers = {} self.zookeeper_servers = {} + self.zookeeper_timeout = 10.0 self.zookeeper_tls_cert = None self.zookeeper_tls_key = None self.zookeeper_tls_ca = None @@ -55,6 +56,7 @@ class Config(ConfigValue): self.providers == other.providers and self.provider_managers == other.provider_managers and self.zookeeper_servers == other.zookeeper_servers and + self.zookeeper_timeout == other.zookeeper_timeout and self.elements_dir == other.elements_dir and self.images_dir == other.images_dir and self.build_log_dir == other.build_log_dir and @@ -106,6 +108,9 @@ class Config(ConfigValue): name = z.host + '_' + str(z.port) self.zookeeper_servers[name] = z + def setZooKeeperTimeout(self, timeout): + self.zookeeper_timeout = float(timeout) + def setDiskImages(self, diskimages_cfg): if not diskimages_cfg: return @@ -346,6 +351,7 @@ def loadConfig(config_path, env=os.environ): newconfig.setMaxHoldAge(config.get('max-hold-age')) newconfig.setWebApp(config.get('webapp')) newconfig.setZooKeeperServers(config.get('zookeeper-servers')) + newconfig.setZooKeeperTimeout(config.get('zookeeper-timeout', 10.0)) newconfig.setDiskImages(config.get('diskimages')) newconfig.setLabels(config.get('labels')) newconfig.setProviders(config.get('providers')) diff --git a/nodepool/launcher.py b/nodepool/launcher.py index 9147ad606..d839f7092 100644 --- a/nodepool/launcher.py +++ b/nodepool/launcher.py @@ -947,7 +947,9 @@ class NodePool(threading.Thread): self.zk.connect(configured, tls_cert=config.zookeeper_tls_cert, tls_key=config.zookeeper_tls_key, - tls_ca=config.zookeeper_tls_ca) + tls_ca=config.zookeeper_tls_ca, + timeout=config.zookeeper_timeout, + ) else: self.log.debug("Detected ZooKeeper server changes") self.zk.resetHosts(configured) diff --git a/nodepool/tests/fixtures/node.yaml b/nodepool/tests/fixtures/node.yaml index 24bf3b998..4a379d762 100644 --- a/nodepool/tests/fixtures/node.yaml +++ b/nodepool/tests/fixtures/node.yaml @@ -13,6 +13,8 @@ zookeeper-tls: cert: {zookeeper_cert} key: {zookeeper_key} +zookeeper-timeout: 20.0 + labels: - name: fake-label min-ready: 1 diff --git a/nodepool/zk.py b/nodepool/zk.py index 773f6d27a..6f6edf993 100644 --- a/nodepool/zk.py +++ b/nodepool/zk.py @@ -974,7 +974,7 @@ class ZooKeeper(object): self._became_lost = False def connect(self, host_list, read_only=False, tls_cert=None, - tls_key=None, tls_ca=None): + tls_key=None, tls_ca=None, timeout=10.0): ''' Establish a connection with ZooKeeper cluster. @@ -992,8 +992,11 @@ class ZooKeeper(object): ''' if self.client is None: hosts = buildZooKeeperHosts(host_list) - args = dict(hosts=hosts, - read_only=read_only) + args = dict( + hosts=hosts, + read_only=read_only, + timeout=timeout, + ) args['use_ssl'] = True if not (tls_key and tls_cert and tls_ca): diff --git a/releasenotes/notes/zk-timeout-bede88920e248c96.yaml b/releasenotes/notes/zk-timeout-bede88920e248c96.yaml new file mode 100644 index 000000000..cb8062802 --- /dev/null +++ b/releasenotes/notes/zk-timeout-bede88920e248c96.yaml @@ -0,0 +1,5 @@ +--- +features: + - | + It is now possible to configure the :attr:`zookeeper-timeout`. which can + help to avoid zookeeper session losses on busy systems.