From f21bb2893ab6c1650ef896e9144b22d5177fe900 Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Fri, 13 Oct 2017 11:10:00 -0400 Subject: [PATCH] Better exception handling during autohold Our autohold can linger longer than we requested if we get an exception during node iteration. Let's handle that particular exception better, and also handle ANY exceptions that may bubble up by deleting the autohold if that occurs. Change-Id: I9d64995406e86cbad7536b85a3206fda7faac253 --- zuul/scheduler.py | 6 +++++- zuul/zk.py | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/zuul/scheduler.py b/zuul/scheduler.py index cfcd865c47..bca62dc021 100644 --- a/zuul/scheduler.py +++ b/zuul/scheduler.py @@ -855,8 +855,12 @@ class Scheduler(threading.Thread): try: self.nodepool.holdNodeSet(nodeset, autohold_key) except Exception: - self.log.exception("Unable to process autohold for %s", + self.log.exception("Unable to process autohold for %s:", autohold_key) + if autohold_key in self.autohold_requests: + self.log.debug("Removing autohold %s due to exception", + autohold_key) + del self.autohold_requests[autohold_key] self.nodepool.returnNodeSet(nodeset) except Exception: diff --git a/zuul/zk.py b/zuul/zk.py index 2fca749cd5..ede78bec7f 100644 --- a/zuul/zk.py +++ b/zuul/zk.py @@ -269,6 +269,9 @@ class ZooKeeper(object): for nodeid in nodes: node_path = '%s/%s' % (self.NODE_ROOT, nodeid) node_data, node_stat = self.client.get(node_path) + if not node_data: + self.log.warning("Node ID %s has no data", nodeid) + continue node_data = self._strToDict(node_data) if (node_data['state'] == zuul.model.STATE_HOLD and node_data.get('hold_job') == identifier):