Re-register missing nodes in static driver
The cleanup hook will take care of re-registering previously offline nodes (e.g. because of a failed liveness check). In addition the current state is now always checked in Zookeeper by removing the local dict with static nodes. Change-Id: Iae4488dcfb33d196953ae6f1b166b9dd3f81d998
This commit is contained in:
parent
47233f434f
commit
e0919ab2cd
|
@ -33,7 +33,6 @@ class StaticNodeProvider(Provider):
|
||||||
|
|
||||||
def __init__(self, provider, *args):
|
def __init__(self, provider, *args):
|
||||||
self.provider = provider
|
self.provider = provider
|
||||||
self.static_nodes = {}
|
|
||||||
|
|
||||||
def checkHost(self, node):
|
def checkHost(self, node):
|
||||||
'''Check node is reachable'''
|
'''Check node is reachable'''
|
||||||
|
@ -217,34 +216,37 @@ class StaticNodeProvider(Provider):
|
||||||
finally:
|
finally:
|
||||||
self.zk.unlockNode(node)
|
self.zk.unlockNode(node)
|
||||||
|
|
||||||
|
def syncNodeCount(self, registered, node, pool):
|
||||||
|
current_count = registered[node["name"]]
|
||||||
|
|
||||||
|
# Register nodes to synchronize with our configuration.
|
||||||
|
if current_count < node["max-parallel-jobs"]:
|
||||||
|
register_cnt = node["max-parallel-jobs"] - current_count
|
||||||
|
self.registerNodeFromConfig(
|
||||||
|
register_cnt, self.provider.name, pool.name, node)
|
||||||
|
|
||||||
|
# De-register nodes to synchronize with our configuration.
|
||||||
|
# This case covers an existing node, but with a decreased
|
||||||
|
# max-parallel-jobs value.
|
||||||
|
elif current_count > node["max-parallel-jobs"]:
|
||||||
|
deregister_cnt = current_count - node["max-parallel-jobs"]
|
||||||
|
try:
|
||||||
|
self.deregisterNode(deregister_cnt, node["name"])
|
||||||
|
except Exception:
|
||||||
|
self.log.exception("Couldn't deregister static node:")
|
||||||
|
|
||||||
def _start(self, zk_conn):
|
def _start(self, zk_conn):
|
||||||
self.zk = zk_conn
|
self.zk = zk_conn
|
||||||
registered = self.getRegisteredNodeHostnames()
|
registered = self.getRegisteredNodeHostnames()
|
||||||
|
|
||||||
|
static_nodes = {}
|
||||||
for pool in self.provider.pools.values():
|
for pool in self.provider.pools.values():
|
||||||
for node in pool.nodes:
|
for node in pool.nodes:
|
||||||
current_count = registered[node["name"]]
|
try:
|
||||||
|
self.syncNodeCount(registered, node, pool)
|
||||||
# Register nodes to synchronize with our configuration.
|
except Exception:
|
||||||
if current_count < node["max-parallel-jobs"]:
|
self.log.exception("Couldn't sync node:")
|
||||||
register_cnt = node["max-parallel-jobs"] - current_count
|
continue
|
||||||
try:
|
|
||||||
self.registerNodeFromConfig(
|
|
||||||
register_cnt, self.provider.name, pool.name, node)
|
|
||||||
except Exception:
|
|
||||||
self.log.exception("Couldn't register static node:")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# De-register nodes to synchronize with our configuration.
|
|
||||||
# This case covers an existing node, but with a decreased
|
|
||||||
# max-parallel-jobs value.
|
|
||||||
elif current_count > node["max-parallel-jobs"]:
|
|
||||||
deregister_cnt = current_count - node["max-parallel-jobs"]
|
|
||||||
try:
|
|
||||||
self.deregisterNode(deregister_cnt, node["name"])
|
|
||||||
except Exception:
|
|
||||||
self.log.exception("Couldn't deregister static node:")
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.updateNodeFromConfig(node)
|
self.updateNodeFromConfig(node)
|
||||||
|
@ -252,13 +254,13 @@ class StaticNodeProvider(Provider):
|
||||||
self.log.exception("Couldn't update static node:")
|
self.log.exception("Couldn't update static node:")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
self.static_nodes[node["name"]] = node
|
static_nodes[node["name"]] = node
|
||||||
|
|
||||||
# De-register nodes to synchronize with our configuration.
|
# De-register nodes to synchronize with our configuration.
|
||||||
# This case covers any registered nodes that no longer appear in
|
# This case covers any registered nodes that no longer appear in
|
||||||
# the config.
|
# the config.
|
||||||
for hostname in list(registered):
|
for hostname in list(registered):
|
||||||
if hostname not in self.static_nodes:
|
if hostname not in static_nodes:
|
||||||
try:
|
try:
|
||||||
self.deregisterNode(registered[hostname], hostname)
|
self.deregisterNode(registered[hostname], hostname)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
@ -275,11 +277,20 @@ class StaticNodeProvider(Provider):
|
||||||
self.log.debug("Stopping")
|
self.log.debug("Stopping")
|
||||||
|
|
||||||
def listNodes(self):
|
def listNodes(self):
|
||||||
|
registered = self.getRegisteredNodeHostnames()
|
||||||
servers = []
|
servers = []
|
||||||
for node in self.static_nodes.values():
|
for pool in self.provider.pools.values():
|
||||||
servers.append(node)
|
for node in pool.nodes:
|
||||||
|
if node["name"] in registered:
|
||||||
|
servers.append(node)
|
||||||
return servers
|
return servers
|
||||||
|
|
||||||
|
def poolNodes(self):
|
||||||
|
nodes = {}
|
||||||
|
for pool in self.provider.pools.values():
|
||||||
|
nodes.update({n["name"]: n for n in pool.nodes})
|
||||||
|
return nodes
|
||||||
|
|
||||||
def cleanupNode(self, server_id):
|
def cleanupNode(self, server_id):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -293,7 +304,14 @@ class StaticNodeProvider(Provider):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def cleanupLeakedResources(self):
|
def cleanupLeakedResources(self):
|
||||||
pass
|
registered = self.getRegisteredNodeHostnames()
|
||||||
|
for pool in self.provider.pools.values():
|
||||||
|
for node in pool.nodes:
|
||||||
|
try:
|
||||||
|
self.syncNodeCount(registered, node, pool)
|
||||||
|
except Exception:
|
||||||
|
self.log.exception("Couldn't sync node:")
|
||||||
|
continue
|
||||||
|
|
||||||
def getRequestHandler(self, poolworker, request):
|
def getRequestHandler(self, poolworker, request):
|
||||||
return StaticNodeRequestHandler(poolworker, request)
|
return StaticNodeRequestHandler(poolworker, request)
|
||||||
|
@ -304,10 +322,10 @@ class StaticNodeProvider(Provider):
|
||||||
'''
|
'''
|
||||||
# It's possible a deleted node no longer exists in our config, so
|
# It's possible a deleted node no longer exists in our config, so
|
||||||
# don't bother to reregister.
|
# don't bother to reregister.
|
||||||
if node.hostname not in self.static_nodes:
|
static_node = self.poolNodes().get(node.hostname)
|
||||||
|
if static_node is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
static_node = self.static_nodes[node.hostname]
|
|
||||||
try:
|
try:
|
||||||
registered = self.getRegisteredNodeHostnames()
|
registered = self.getRegisteredNodeHostnames()
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
|
@ -277,3 +277,19 @@ class TestDriverStatic(tests.DBTestCase):
|
||||||
new_nodes = self.waitForNodes('fake-label')
|
new_nodes = self.waitForNodes('fake-label')
|
||||||
self.assertEqual(len(new_nodes), 1)
|
self.assertEqual(len(new_nodes), 1)
|
||||||
self.assertEqual(nodes[0].hostname, new_nodes[0].hostname)
|
self.assertEqual(nodes[0].hostname, new_nodes[0].hostname)
|
||||||
|
|
||||||
|
def test_missing_static_node(self):
|
||||||
|
"""Test that a missing static node is added"""
|
||||||
|
configfile = self.setup_config('static-2-nodes.yaml')
|
||||||
|
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||||
|
pool.start()
|
||||||
|
|
||||||
|
self.log.debug("Waiting for initial nodes")
|
||||||
|
nodes = self.waitForNodes('fake-label', 2)
|
||||||
|
self.assertEqual(len(nodes), 2)
|
||||||
|
|
||||||
|
self.zk.deleteNode(nodes[0])
|
||||||
|
|
||||||
|
self.log.debug("Waiting for node to transition to ready again")
|
||||||
|
nodes = self.waitForNodes('fake-label', 2)
|
||||||
|
self.assertEqual(len(nodes), 2)
|
||||||
|
|
Loading…
Reference in New Issue