Fix error with static node reuse

An error in the slot logic was causing static nodes not to be
immediately re-enrolled after deletion.  This went unnoticed because
the cleanup worker acts as a fallback and re-enrolls them.

Correcting this logic will avoid an approximately 60 second delay
in static node reuse.

Change-Id: Ib39e840555bc998058ead9b284908ff2569ebf51
This commit is contained in:
James E. Blair 2023-06-01 11:16:09 -07:00
parent c8984ed1b3
commit ffe05578ef
2 changed files with 41 additions and 8 deletions

View File

@ -63,9 +63,6 @@ class StaticNodeProvider(Provider, QuotaSupport):
# interfere with a newer versions of ourselves running.
self._idle = False
def _getSlot(self, node):
return self._node_slots[nodeTuple(node)].index(node)
def checkHost(self, static_node):
'''Check node is reachable'''
# only gather host keys if the connection type is ssh or network_cli
@ -193,7 +190,7 @@ class StaticNodeProvider(Provider, QuotaSupport):
# This can be very chatty, so we don't normally log it. It
# can be helpful when debugging tests.
# self._debugSlots(node_slots, unslotted_nodes)
self._debugSlots(node_slots, unslotted_nodes)
# Find all nodes without slot ids, store each in first available slot
for node in unslotted_nodes:
@ -498,12 +495,26 @@ class StaticNodeProvider(Provider, QuotaSupport):
# this node. When that happens the node in the node slot is
# different than the one we are processing and we can short
# circuit.
try:
existing_node_slot = self._getSlot(node)
except Exception:
existing_node_slots = self._node_slots.get(nodeTuple(node))
if existing_node_slots is None:
# We'll let config synchronization correct any slots changes
return
if node != self._node_slots[node_tuple][existing_node_slot]:
try:
existing_node_slots.index(node)
# If we found an existing node slot, that's
# unexpected, we should let resync fix anything.
return
except ValueError:
# The expected case is that the slot is occupied by
# None rather than a node object, so this is the
# normal case.
pass
try:
existing_node = existing_node_slots[node.slot]
if existing_node is not None:
# The current slot entry should be None.
return
except IndexError:
return
self.log.debug("Re-registering deleted node: %s", node_tuple)

View File

@ -70,6 +70,28 @@ class TestDriverStatic(tests.DBTestCase):
self.assertIsNone(nodes[0].shell_type)
self.assertEqual(nodes[0].slot, 0)
def test_static_reuse(self):
'''
Test that static nodes are reused without benefit of the
cleanup worker
'''
configfile = self.setup_config('static-basic.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
# Make sure the cleanup worker doesn't run.
pool.cleanup_interval = 600
self.startPool(pool)
self.log.debug("Waiting for node pre-registration")
nodes = self.waitForNodes('fake-label')
self.assertEqual(nodes[0].slot, 0)
nodes[0].state = zk.USED
self.zk.storeNode(nodes[0])
self.log.debug("Waiting for node to be re-available")
nodes = self.waitForNodes('fake-label')
self.assertEqual(nodes[0].slot, 0)
def test_static_python_path(self):
'''
Test that static python-path works.