Browse Source

Merge "zk: retry initial zookeeper connection attempts"

Zuul 9 months ago
parent
commit
fe80343d90
1 changed files with 21 additions and 1 deletions
  1. 21
    1
      nodepool/zk.py

+ 21
- 1
nodepool/zk.py View File

@@ -20,6 +20,7 @@ import logging
20 20
 import time
21 21
 from kazoo.client import KazooClient, KazooState
22 22
 from kazoo import exceptions as kze
23
+from kazoo.handlers.threading import KazooTimeoutError
23 24
 from kazoo.recipe.lock import Lock
24 25
 
25 26
 from nodepool import exceptions as npe
@@ -655,12 +656,16 @@ class ZooKeeper(object):
655 656
     REQUEST_ROOT = "/nodepool/requests"
656 657
     REQUEST_LOCK_ROOT = "/nodepool/requests-lock"
657 658
 
659
+    # Log zookeeper retry every 10 seconds
660
+    retry_log_rate = 10
661
+
658 662
     def __init__(self):
659 663
         '''
660 664
         Initialize the ZooKeeper object.
661 665
         '''
662 666
         self.client = None
663 667
         self._became_lost = False
668
+        self._last_retry_log = 0
664 669
 
665 670
     # =======================================================================
666 671
     # Private Methods
@@ -788,6 +793,15 @@ class ZooKeeper(object):
788 793
         else:
789 794
             self.log.debug("ZooKeeper connection: CONNECTED")
790 795
 
796
+    def logConnectionRetryEvent(self):
797
+        '''
798
+        Kazoo retry callback
799
+        '''
800
+        now = time.monotonic()
801
+        if now - self._last_retry_log >= self.retry_log_rate:
802
+            self.log.warning("Retrying zookeeper connection")
803
+            self._last_retry_log = now
804
+
791 805
     # =======================================================================
792 806
     # Public Methods and Properties
793 807
     # =======================================================================
@@ -834,7 +848,13 @@ class ZooKeeper(object):
834 848
             hosts = buildZooKeeperHosts(host_list)
835 849
             self.client = KazooClient(hosts=hosts, read_only=read_only)
836 850
             self.client.add_listener(self._connection_listener)
837
-            self.client.start()
851
+            # Manually retry initial connection attempt
852
+            while True:
853
+                try:
854
+                    self.client.start(1)
855
+                    break
856
+                except KazooTimeoutError:
857
+                    self.logConnectionRetryEvent()
838 858
 
839 859
     def disconnect(self):
840 860
         '''

Loading…
Cancel
Save