Speed up node listing

This speeds up node listing in the CLI ("nodepool list") and the webapp ("/node-list") significantly. The main culprit is that filling in the "locked" field is expensive because we attempt to lock each node. To make that faster, we now just query the lock contenders to determine whether it is locked (if there are contenders, it's locked). Further, in the webapp, we can use the cache more aggressively. First, we update the cache listener to watch lock contenders and cache those values on our Node objects. That means the webapp doesn't even need to use the optmization above. Further, we can have the webapp use cached node ids, at which point it doesn't need to make any ZK queries at all. With a local setup of 6000 nodes and a localhost ZK connection (real world times will be much higher due to network delays), this takes the web server node list from 3 seconds to 0.009 seconds. The CLI node list improves from 2.1 seconds to 0.8 seconds (excluding startup time). Change-Id: Id857556865b6ad75b9ec404bd7ef0c45e2a527bd
2022-07-12 13:57:49 -07:00 · 2022-07-12 13:57:49 -07:00 · cf5f63bd6f
parent bd6f610113
commit cf5f63bd6f
2 changed files with 29 additions and 10 deletions
--- a/nodepool/status.py
+++ b/nodepool/status.py
@ -133,13 +133,12 @@ def node_list(zk, node_id=None):

    def _get_node_values(node):
        locked = "unlocked"
-        try:
-            zk.lockNode(node, blocking=False)
-        except Exception:
+        if zk.enable_cache:
+            if node.lock_contenders:
                locked = "locked"
        else:
-            zk.unlockNode(node)
-
+            if zk.getNodeLockContenders(node):
+                locked = "locked"
        values = [
            node.id,
            node.provider,
@ -170,7 +169,8 @@ def node_list(zk, node_id=None):
            objs.append(dict(zip(headers_table.keys(),
                                 values)))
    else:
-        for node in zk.nodeIterator():
+        cached_ids = zk.enable_cache
+        for node in zk.nodeIterator(cached_ids=cached_ids):
            values = _get_node_values(node)

            objs.append(dict(zip(headers_table.keys(),
--- a/nodepool/zk/zookeeper.py
+++ b/nodepool/zk/zookeeper.py
@ -474,7 +474,11 @@ class Node(BaseModel):

    def __init__(self, id=None):
        super(Node, self).__init__(id)
+        # Local lock object; not serialized
        self.lock = None
+        # Cached list of lock contenders; not serialized (and possibly
+        # not up to date; use for status listings only).
+        self.lock_contenders = set()
        self.cloud = None
        self.provider = None
        self.pool = None
@ -2250,8 +2254,7 @@ class ZooKeeper(ZooKeeperBase):
            if path == self.NODE_ROOT:
                return

-            # Ignore lock nodes
-            if '/lock' in path:
+            if path.endswith('/lock'):
                return

        # Ignore any non-node related events such as connection events here
@ -2261,7 +2264,23 @@ class ZooKeeper(ZooKeeperBase):
            return

        path = event.event_data.path
-        node_id = path.rsplit('/', 1)[1]
+        node_path = path[len(self.NODE_ROOT) + 1:]
+        parts = node_path.split('/')
+        node_id = parts[0]
+        if len(parts) > 1 and parts[1] == 'lock':
+            if len(parts) > 2:
+                # A lock contender is being added or removed
+                contender = parts[2]
+                old_node = self._cached_nodes.get(node_id)
+                if not old_node:
+                    return
+                if event.event_type in (TreeEvent.NODE_ADDED,
+                                        TreeEvent.NODE_UPDATED):
+                    old_node.lock_contenders.add(contender)
+                elif event.event_type == TreeEvent.NODE_REMOVED:
+                    old_node.lock_contenders.discard(contender)
+            # This event was for a lock path; no further handling necessary
+            return

        if event.event_type in (TreeEvent.NODE_ADDED, TreeEvent.NODE_UPDATED):
            # Nodes with empty data are invalid so skip add or update these.