Better exception handling during autohold

Our autohold can linger longer than we requested if we get an
exception during node iteration. Let's handle that particular
exception better, and also handle ANY exceptions that may bubble
up by deleting the autohold if that occurs.

Change-Id: I9d64995406e86cbad7536b85a3206fda7faac253
This commit is contained in:
David Shrewsbury
2017-10-13 11:10:00 -04:00
parent 76fc525d14
commit f21bb2893a
2 changed files with 8 additions and 1 deletions

View File

@@ -855,8 +855,12 @@ class Scheduler(threading.Thread):
try:
self.nodepool.holdNodeSet(nodeset, autohold_key)
except Exception:
self.log.exception("Unable to process autohold for %s",
self.log.exception("Unable to process autohold for %s:",
autohold_key)
if autohold_key in self.autohold_requests:
self.log.debug("Removing autohold %s due to exception",
autohold_key)
del self.autohold_requests[autohold_key]
self.nodepool.returnNodeSet(nodeset)
except Exception:

View File

@@ -269,6 +269,9 @@ class ZooKeeper(object):
for nodeid in nodes:
node_path = '%s/%s' % (self.NODE_ROOT, nodeid)
node_data, node_stat = self.client.get(node_path)
if not node_data:
self.log.warning("Node ID %s has no data", nodeid)
continue
node_data = self._strToDict(node_data)
if (node_data['state'] == zuul.model.STATE_HOLD and
node_data.get('hold_job') == identifier):