From e49cf4a0e149e852b7d58d28aac443ac274f5278 Mon Sep 17 00:00:00 2001 From: Benoit Bayszczak Date: Thu, 25 Oct 2018 12:38:44 +0200 Subject: [PATCH] Fix node leak when skipping child jobs Zuul has the possibility to skip child jobs using zuul_return. However zuul still requests and locks nodes for these skipped jobs and never returns them. If many jobs are skipped this can lead to a completely wedged system. The only way to recover from this is to restart the zuul-scheduler. This can be fixed by filtering out skipped jobs in findJobsToRequest. Change-Id: I69f7dd1fe142c5b4d8530407c3bca37fdd976597 Co-Authored-By: Tobias Henkel --- .../config/data-return/git/common-config/zuul.yaml | 8 ++++++++ zuul/model.py | 2 ++ 2 files changed, 10 insertions(+) diff --git a/tests/fixtures/config/data-return/git/common-config/zuul.yaml b/tests/fixtures/config/data-return/git/common-config/zuul.yaml index f8cc624073..608cc835b9 100644 --- a/tests/fixtures/config/data-return/git/common-config/zuul.yaml +++ b/tests/fixtures/config/data-return/git/common-config/zuul.yaml @@ -37,9 +37,17 @@ success-url: docs/index.html run: playbooks/data-return-relative.yaml +# This child job will be skipped in the test case test_data_return_child_jobs. +# In order to verify that this doesn't lead to node leaks attach a nodeset to +# it. Each test case automatically verifies that there are no open node +# requests and no locked nodes left behind. - job: name: child run: playbooks/child.yaml + nodeset: + nodes: + - name: node + label: test - job: name: several-zuul-return-parent diff --git a/zuul/model.py b/zuul/model.py index 85b5a5444f..d14173b23b 100644 --- a/zuul/model.py +++ b/zuul/model.py @@ -2178,6 +2178,8 @@ class QueueItem(object): build = build_set.getBuild(job.name) if build and (build.result == 'SUCCESS' or build.paused): successful_job_names.add(job.name) + elif build and build.result == 'SKIPPED': + pass else: nodeset = build_set.getJobNodeSet(job.name) if nodeset is None: