Auto-delete expired autohold requests

When a request is created with a node expiration, set a request
expiration for 24 hours after the nodes expire.

Change-Id: I0fbf59eb00d047e5b066d2f7347b77a48f8fb0e7
This commit is contained in:
David Shrewsbury 2019-06-06 16:16:03 -04:00
parent 6bbf3609bb
commit 9f5743366d
5 changed files with 110 additions and 0 deletions

View File

@ -2107,6 +2107,54 @@ class TestScheduler(ZuulTestCase):
self.assertEqual(".*", request['ref_filter'])
self.assertEqual("reason text", request['reason'])
@simple_layout('layouts/autohold.yaml')
def test_autohold_request_expiration(self):
orig_exp = self.sched.EXPIRED_HOLD_REQUEST_TTL
def reset_exp():
self.sched.EXPIRED_HOLD_REQUEST_TTL = orig_exp
self.addCleanup(reset_exp)
client = zuul.rpcclient.RPCClient('127.0.0.1',
self.gearman_server.port)
self.addCleanup(client.shutdown)
# Temporarily shorten the hold request expiration time
r = client.autohold('tenant-one', 'org/project', 'project-test2',
"", "", "reason text", 1, 1)
self.assertTrue(r)
autohold_requests = client.autohold_list()
self.assertEqual(1, len(autohold_requests))
req = autohold_requests[0]
self.assertIsNone(req['expired'])
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
self.executor_server.failJob('project-test2', A)
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
autohold_requests = client.autohold_list()
self.assertEqual(1, len(autohold_requests))
req = autohold_requests[0]
self.assertIsNotNone(req['expired'])
# Temporarily shorten hold time so that the hold request can be
# auto-deleted (which is done on another test failure). And wait
# long enough for nodes to expire and request to delete.
self.sched.EXPIRED_HOLD_REQUEST_TTL = 1
time.sleep(3)
B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B')
self.executor_server.failJob('project-test2', B)
self.fake_gerrit.addEvent(B.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
for _ in iterate_timeout(10, 'hold request expiration'):
if len(client.autohold_list()) == 0:
break
@simple_layout('layouts/three-projects.yaml')
def test_dependent_behind_dequeue(self):
# This particular test does a large amount of merges and needs a little

View File

@ -468,6 +468,7 @@ class Client(zuul.cmd.ZuulApp):
print("Max Count: %s" % request['max_count'])
print("Current Count: %s" % request['current_count'])
print("Node Expiration: %s" % request['node_expiration'])
print("Request Expiration: %s" % time.ctime(request['expired']))
print("Reason: %s" % request['reason'])
print("Held Nodes: %s" % request['nodes'])

View File

@ -4649,6 +4649,7 @@ class HoldRequest(object):
self.lock = None
self.stat = None
self.id = None
self.expired = None
self.tenant = None
self.project = None
self.job = None
@ -4670,6 +4671,7 @@ class HoldRequest(object):
Return a new object from the given data dictionary.
'''
obj = HoldRequest()
obj.expired = data.get('expired')
obj.tenant = data.get('tenant')
obj.project = data.get('project')
obj.job = data.get('job')
@ -4687,6 +4689,7 @@ class HoldRequest(object):
'''
d = dict()
d['id'] = self.id
d['expired'] = self.expired
d['tenant'] = self.tenant
d['project'] = self.project
d['job'] = self.job
@ -4702,6 +4705,7 @@ class HoldRequest(object):
'''
Update current object with data from the given dictionary.
'''
self.expired = d.get('expired')
self.tenant = d.get('tenant')
self.project = d.get('project')
self.job = d.get('job')

View File

@ -11,6 +11,7 @@
# under the License.
import logging
import time
from collections import defaultdict
from zuul import model
@ -194,6 +195,11 @@ class Nodepool(object):
request.nodes += [node.id for node in nodes]
request.current_count += 1
# Request has been used at least the maximum number of times so set
# the expiration time so that it can be auto-deleted.
if request.current_count >= request.max_count and not request.expired:
request.expired = time.time()
# Give ourselves a few seconds to try to obtain the lock rather than
# immediately give up.
self.sched.zk.lockHoldRequest(request, timeout=5)

View File

@ -269,6 +269,9 @@ class Scheduler(threading.Thread):
log = logging.getLogger("zuul.Scheduler")
_stats_interval = 30
# Number of seconds past node expiration a hold request will remain
EXPIRED_HOLD_REQUEST_TTL = 24 * 60 * 60
def __init__(self, config, testonly=False):
threading.Thread.__init__(self)
self.daemon = True
@ -1314,6 +1317,50 @@ class Scheduler(threading.Thread):
return
pipeline.manager.onBuildPaused(event.build)
def _handleExpiredHoldRequest(self, request):
'''
Check if a hold request is expired and delete it if it is.
The 'expiration' attribute will be set to the clock time when the
hold request was used for the last time. If this is NOT set, then
the request is still active.
If a node expiration time is set on the request, and the request is
expired, *and* we've waited for a defined period past the node
expiration (EXPIRED_HOLD_REQUEST_TTL), then we will delete the hold
request.
:returns: True if it is expired, False otherwise.
'''
if not request.expired:
return False
if not request.node_expiration:
# Request has been used up but there is no node expiration, so
# we don't auto-delete it.
return True
elapsed = time.time() - request.expired
if elapsed < self.EXPIRED_HOLD_REQUEST_TTL + request.node_expiration:
# Haven't reached our defined expiration lifetime, so don't
# auto-delete it yet.
return True
try:
self.zk.lockHoldRequest(request)
self.log.info("Removing expired hold request %s", request)
self.zk.deleteHoldRequest(request)
except Exception:
self.log.exception(
"Failed to delete expired hold request %s", request)
finally:
try:
self.zk.unlockHoldRequest(request)
except Exception:
pass
return True
def _getAutoholdRequest(self, build):
change = build.build_set.item.change
@ -1350,6 +1397,10 @@ class Scheduler(threading.Thread):
request = self.zk.getHoldRequest(request_id)
if not request:
continue
if self._handleExpiredHoldRequest(request):
continue
ref_filter = request.ref_filter
if request.current_count >= request.max_count: