Speed up node listing

This speeds up node listing in the CLI ("nodepool list") and the
webapp ("/node-list") significantly.

The main culprit is that filling in the "locked" field is expensive
because we attempt to lock each node.  To make that faster, we
now just query the lock contenders to determine whether it is locked
(if there are contenders, it's locked).

Further, in the webapp, we can use the cache more aggressively.  First,
we update the cache listener to watch lock contenders and cache those
values on our Node objects.  That means the webapp doesn't even need
to use the optmization above.  Further, we can have the webapp use
cached node ids, at which point it doesn't need to make any ZK queries
at all.

With a local setup of 6000 nodes and a localhost ZK connection (real
world times will be much higher due to network delays), this
takes the web server node list from 3 seconds to 0.009 seconds.

The CLI node list improves from 2.1 seconds to 0.8 seconds (excluding
startup time).

Change-Id: Id857556865b6ad75b9ec404bd7ef0c45e2a527bd
This commit is contained in:
James E. Blair 2022-07-12 13:57:49 -07:00
parent bd6f610113
commit cf5f63bd6f
2 changed files with 29 additions and 10 deletions

View File

@ -133,13 +133,12 @@ def node_list(zk, node_id=None):
def _get_node_values(node):
locked = "unlocked"
try:
zk.lockNode(node, blocking=False)
except Exception:
if zk.enable_cache:
if node.lock_contenders:
locked = "locked"
else:
zk.unlockNode(node)
if zk.getNodeLockContenders(node):
locked = "locked"
values = [
node.id,
node.provider,
@ -170,7 +169,8 @@ def node_list(zk, node_id=None):
objs.append(dict(zip(headers_table.keys(),
values)))
else:
for node in zk.nodeIterator():
cached_ids = zk.enable_cache
for node in zk.nodeIterator(cached_ids=cached_ids):
values = _get_node_values(node)
objs.append(dict(zip(headers_table.keys(),

View File

@ -474,7 +474,11 @@ class Node(BaseModel):
def __init__(self, id=None):
super(Node, self).__init__(id)
# Local lock object; not serialized
self.lock = None
# Cached list of lock contenders; not serialized (and possibly
# not up to date; use for status listings only).
self.lock_contenders = set()
self.cloud = None
self.provider = None
self.pool = None
@ -2250,8 +2254,7 @@ class ZooKeeper(ZooKeeperBase):
if path == self.NODE_ROOT:
return
# Ignore lock nodes
if '/lock' in path:
if path.endswith('/lock'):
return
# Ignore any non-node related events such as connection events here
@ -2261,7 +2264,23 @@ class ZooKeeper(ZooKeeperBase):
return
path = event.event_data.path
node_id = path.rsplit('/', 1)[1]
node_path = path[len(self.NODE_ROOT) + 1:]
parts = node_path.split('/')
node_id = parts[0]
if len(parts) > 1 and parts[1] == 'lock':
if len(parts) > 2:
# A lock contender is being added or removed
contender = parts[2]
old_node = self._cached_nodes.get(node_id)
if not old_node:
return
if event.event_type in (TreeEvent.NODE_ADDED,
TreeEvent.NODE_UPDATED):
old_node.lock_contenders.add(contender)
elif event.event_type == TreeEvent.NODE_REMOVED:
old_node.lock_contenders.discard(contender)
# This event was for a lock path; no further handling necessary
return
if event.event_type in (TreeEvent.NODE_ADDED, TreeEvent.NODE_UPDATED):
# Nodes with empty data are invalid so skip add or update these.