Fix repo state restore / Keep jobgraphs frozen

This is two changes squashed.  First:

Fix missing repo state restore

The global repo state handling misses the restoration of the repo
states of projects that are not part of the dependency chain. This can
be generically fixed by ensuring that the repo state is restored
immediately after clone into the job workspace.

Original Change-Id: I61db67edb3952cdba7709b5b597dac93be4b6dde

Second:

Keep jobgraphs frozen across reconfiguration

This removes test cases which are no longer be relevant.

Many of these were testing various mutations across reconfigurations,
but with job graphs frozen, about the only thing that we expect to
change now is when a pipeline, project, or tenant is deleted.  Test
cases are modified or added to test these.

It appears even the current code may have some bugs related to deleting
pipelines and tenants.  The improved testing in this change highlighted
that.  The scheduler is updated to ensure that it cancels all jobs on
pipelines or tenants that are removed from a running configuration.  This
should ensure we don't leak nodes or semaphores.

Change-Id: I2e4bd2fb9222b49cb10661d28d4c52a3c994ba62
Co-Authored-By: James E. Blair <jim@acmegating.com>
This commit is contained in:
Tobias Henkel 2021-04-08 08:38:19 +02:00 committed by James E. Blair
parent f7f689c87d
commit fc11cf1334
21 changed files with 342 additions and 1040 deletions

View File

@ -2999,6 +2999,14 @@ class FakeSqlConnection(sqlconnection.SQLConnection):
class RecordingAnsibleJob(zuul.executor.server.AnsibleJob):
result = None
def _execute(self):
for _ in iterate_timeout(60, 'wait for merge'):
if not self.executor_server.hold_jobs_in_start:
break
time.sleep(1)
super()._execute()
def doMergeChanges(self, merger, items, repo_state):
# Get a merger in order to update the repos involved in this job.
commit = super(RecordingAnsibleJob, self).doMergeChanges(
@ -3006,11 +3014,6 @@ class RecordingAnsibleJob(zuul.executor.server.AnsibleJob):
if not commit: # merge conflict
self.recordResult('MERGER_FAILURE')
for _ in iterate_timeout(60, 'wait for merge'):
if not self.executor_server.hold_jobs_in_start:
break
time.sleep(1)
return commit
def recordResult(self, result):

View File

@ -1,4 +0,0 @@
- project:
check:
jobs:
- compile

View File

@ -1,16 +1,3 @@
- pipeline:
name: check
manager: independent
trigger:
gerrit:
- event: patchset-created
success:
gerrit:
Verified: 1
failure:
gerrit:
Verified: -1
- job:
name: base
parent: null
@ -20,6 +7,3 @@
- project:
name: org/project
check:
jobs:
- project-test1

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,8 @@
- tenant:
name: tenant-one
source:
gerrit:
config-projects:
- common-config
untrusted-projects:
- org/project

View File

@ -1,61 +0,0 @@
- pipeline:
name: check
manager: independent
trigger:
gerrit:
- event: patchset-created
success:
gerrit:
Verified: 1
failure:
gerrit:
Verified: -1
- job:
name: base
parent: null
run: playbooks/run.yaml
- job:
name: project-merge
- job:
name: project-test1
- job:
name: project-test2
- job:
name: project-test3
- job:
name: project-test4
- job:
name: project-test5
nodeset:
nodes:
- name: controller
label: label1
- job:
name: project-test6
- project:
name: org/project
check:
fail-fast: true
jobs:
- project-merge
- project-test2:
dependencies: project-merge
- project-test3:
dependencies:
- name: project-test2
soft: true
- project-test4:
dependencies: project-test2
- project-test5
- project-test6:
dependencies: project-merge
voting: false

View File

@ -1,67 +0,0 @@
- pipeline:
name: gate
manager: dependent
failure-message: Build failed. For information on how to proceed, see http://wiki.example.org/Test_Failures
trigger:
gerrit:
- event: comment-added
approval:
- Approved: 1
success:
gerrit:
Verified: 2
submit: true
failure:
gerrit:
Verified: -2
start:
gerrit:
Verified: 0
precedence: high
- job:
name: base
parent: null
run: playbooks/base.yaml
- job:
name: project-merge
hold-following-changes: true
run: playbooks/project-merge.yaml
- job:
name: project-test1
run: playbooks/project-test1.yaml
- job:
name: project-test2
run: playbooks/project-test2.yaml
- job:
name: project-test3
run: playbooks/project-test3.yaml
- job:
name: project-testfile
files:
- .*-requires
run: playbooks/project-testfile.yaml
- project:
name: org/project
merge-mode: cherry-pick
gate:
jobs:
- project-merge
- project-test1:
dependencies:
- project-merge
- project-test2:
dependencies:
- project-merge
- project-test3:
dependencies:
- project-merge
- project-testfile:
dependencies:
- project-merge

View File

@ -0,0 +1,25 @@
- job:
name: base
parent: null
run: playbooks/base.yaml
- job:
name: project-merge
hold-following-changes: true
run: playbooks/project-merge.yaml
- job:
name: project-test1
run: playbooks/project-test1.yaml
- job:
name: project-test2
run: playbooks/project-test2.yaml
- job:
name: project-testfile
run: playbooks/project-testfile.yaml
- project:
name: org/project
merge-mode: cherry-pick

View File

@ -1,43 +0,0 @@
- pipeline:
name: check
manager: independent
trigger:
gerrit:
- event: patchset-created
success:
gerrit:
Verified: 1
failure:
gerrit:
Verified: -1
- job:
name: base
parent: null
run: playbooks/base.yaml
- job:
name: project-merge
hold-following-changes: true
run: playbooks/project-merge.yaml
- job:
name: project-test2
run: playbooks/project-test2.yaml
- job:
name: project-testfile
run: playbooks/project-testfile.yaml
- project:
name: org/project
merge-mode: cherry-pick
check:
jobs:
- project-merge
- project-test2:
dependencies:
- project-merge
- project-testfile:
dependencies:
- project-merge

View File

@ -1,93 +0,0 @@
- pipeline:
name: check
manager: independent
trigger:
gerrit:
- event: patchset-created
success:
gerrit:
Verified: 1
failure:
gerrit:
Verified: -1
- pipeline:
name: gate
manager: dependent
failure-message: Build failed. For information on how to proceed, see http://wiki.example.org/Test_Failures
trigger:
gerrit:
- event: comment-added
approval:
- Approved: 1
success:
gerrit:
Verified: 2
submit: true
failure:
gerrit:
Verified: -2
start:
gerrit:
Verified: 0
precedence: high
- job:
name: base
parent: null
run: playbooks/base.yaml
- job:
name: project-merge
hold-following-changes: true
run: playbooks/project-merge.yaml
- job:
name: project-test1
run: playbooks/project-test1.yaml
- job:
name: project-test2
run: playbooks/project-test2.yaml
- job:
name: project1-project2-integration
run: playbooks/project1-project2-integration.yaml
- project:
name: org/project1
check:
jobs:
- project-merge
- project-test1:
dependencies: project-merge
- project-test2:
dependencies: project-merge
gate:
jobs:
- project-merge
- project-test1:
dependencies: project-merge
- project-test2:
dependencies: project-merge
- project:
name: org/project2
check:
jobs:
- project-merge
- project-test1:
dependencies: project-merge
- project-test2:
dependencies: project-merge
- project1-project2-integration:
dependencies: project-merge
gate:
jobs:
- project-merge
- project-test1:
dependencies: project-merge
- project-test2:
dependencies: project-merge
- project1-project2-integration:
dependencies: project-merge

View File

@ -1,95 +0,0 @@
- pipeline:
name: check
manager: independent
trigger:
gerrit:
- event: patchset-created
success:
gerrit:
Verified: 1
failure:
gerrit:
Verified: -1
- pipeline:
name: gate
manager: dependent
failure-message: Build failed. For information on how to proceed, see http://wiki.example.org/Test_Failures
trigger:
gerrit:
- event: comment-added
approval:
- Approved: 1
success:
gerrit:
Verified: 2
submit: true
failure:
gerrit:
Verified: -2
start:
gerrit:
Verified: 0
precedence: high
- job:
name: base
parent: null
run: playbooks/base.yaml
- job:
name: project-merge
hold-following-changes: true
run: playbooks/project-merge.yaml
- job:
name: project-test1
run: playbooks/project-test1.yaml
- job:
name: project-test2
run: playbooks/project-test2.yaml
- job:
name: project1-project2-integration
run: playbooks/project1-project2-integration.yaml
- project:
name: org/project1
check:
jobs:
- project-merge
- project-test1:
dependencies: project-merge
- project-test2:
dependencies: project-merge
gate:
queue: integrated
jobs:
- project-merge
- project-test1:
dependencies: project-merge
- project-test2:
dependencies: project-merge
- project:
name: org/project2
check:
jobs:
- project-merge
- project-test1:
dependencies: project-merge
- project-test2:
dependencies: project-merge
- project1-project2-integration:
dependencies: project-merge
gate:
queue: integrated
jobs:
- project-merge
- project-test1:
dependencies: project-merge
- project-test2:
dependencies: project-merge
- project1-project2-integration:
dependencies: project-merge

View File

@ -0,0 +1,27 @@
- pipeline:
name: periodic
manager: independent
# Trigger is required, set it to one that is a noop
# during tests that check the timer trigger.
trigger:
github:
- event: pull_request
- job:
name: base
parent: null
run: playbooks/base.yaml
- job:
name: project-bitrot
nodeset:
nodes:
- name: static
label: ubuntu-xenial
run: playbooks/project-bitrot.yaml
- project:
name: org/project
periodic:
jobs:
- project-bitrot

View File

@ -1,41 +0,0 @@
- pipeline:
name: gate
manager: dependent
trigger:
gerrit:
- event: comment-added
approval:
- Approved: 1
start:
gerrit:
Verified: 0
success:
gerrit:
Verified: 2
submit: true
failure:
gerrit:
Verified: -2
- job:
name: base
parent: null
nodeset:
nodes:
- label: ubuntu-xenial
name: controller
- job:
name: job1
run: playbooks/job1.yaml
- job:
name: job2
run: playbooks/job2.yaml
- project:
name: org/project
gate:
jobs:
- job1
- job2

View File

@ -1,40 +0,0 @@
- pipeline:
name: gate
manager: dependent
trigger:
gerrit:
- event: comment-added
approval:
- Approved: 1
start:
gerrit:
Verified: 0
success:
gerrit:
Verified: 2
submit: true
failure:
gerrit:
Verified: -2
- job:
name: base
parent: null
nodeset:
nodes:
- label: ubuntu-xenial
name: controller
- job:
name: job1
run: playbooks/job1.yaml
- job:
name: job2
run: playbooks/job2.yaml
- project:
name: org/project
gate:
jobs:
- job1

View File

@ -350,7 +350,7 @@ class TestGithubDriver(ZuulTestCase):
# Stop queuing timer triggered jobs so that the assertions
# below don't race against more jobs being queued.
self.commitConfigUpdate('org/common-config',
'layouts/basic-github.yaml')
'layouts/no-timer-github.yaml')
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
self.waitUntilSettled()
# If APScheduler is in mid-event when we remove the job, we

View File

@ -2437,8 +2437,9 @@ class TestScheduler(ZuulTestCase):
tenant = self.scheds.first.sched.abide.tenants.get('tenant-one')
trusted, project = tenant.getProject('org/project')
url = self.fake_gerrit.getGitUrl(project)
self.executor_server.merger._addProject('review.example.com',
'org/project', url, None, None)
self.executor_server.merger._addProject(
'review.example.com', 'gerrit', 'org/project', url, None, None,
None)
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
A.addPatchset(large=True)
# TODOv3(jeblair): add hostname to upstream root
@ -3004,32 +3005,6 @@ class TestScheduler(ZuulTestCase):
self.assertRaises(Exception, self.assertReportedStat,
'test-gauge.1_2_3_4', '12', 'g')
def test_stuck_job_cleanup(self):
"Test that pending jobs are cleaned up if removed from layout"
# We want to hold the project-merge job that the fake change enqueues
self.gearman_server.hold_jobs_in_queue = True
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
A.addApproval('Code-Review', 2)
self.fake_gerrit.addEvent(A.addApproval('Approved', 1))
self.waitUntilSettled()
# The assertion is that we have one job in the queue, project-merge
self.assertEqual(len(self.gearman_server.getQueue()), 1)
self.commitConfigUpdate('common-config', 'layouts/no-jobs.yaml')
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
self.waitUntilSettled()
self.gearman_server.release('gate-noop')
self.waitUntilSettled()
# asserting that project-merge is removed from queue
self.assertEqual(len(self.gearman_server.getQueue()), 0)
self.assertTrue(self.scheds.first.sched._areAllBuildsComplete())
self.assertEqual(len(self.history), 1)
self.assertEqual(self.history[0].name, 'gate-noop')
self.assertEqual(self.history[0].result, 'SUCCESS')
def test_file_head(self):
# This is a regression test for an observed bug. A change
# with a file named "HEAD" in the root directory of the repo
@ -3472,287 +3447,6 @@ class TestScheduler(ZuulTestCase):
else:
time.sleep(0)
def test_live_reconfiguration_abort(self):
# Raise an exception during reconfiguration and verify we
# still function.
self.executor_server.hold_jobs_in_build = True
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
A.addApproval('Code-Review', 2)
self.fake_gerrit.addEvent(A.addApproval('Approved', 1))
self.waitUntilSettled()
tenant = self.scheds.first.sched.abide.tenants.get('tenant-one')
pipeline = tenant.layout.pipelines['gate']
change = pipeline.getAllItems()[0].change
# Set this to an invalid value to cause an exception during
# reconfiguration.
change.branch = None
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
self.waitUntilSettled()
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
self.waitUntilSettled()
self.assertEqual(self.getJobFromHistory('project-merge').result,
'ABORTED')
self.assertEqual(A.data['status'], 'NEW')
# The final report fails because of the invalid value set above.
self.assertEqual(A.reported, 1)
def test_live_reconfiguration_merge_conflict(self):
# A real-world bug: a change in a gate queue has a merge
# conflict and a job is added to its project while it's
# sitting in the queue. The job gets added to the change and
# enqueued and the change gets stuck.
self.executor_server.hold_jobs_in_build = True
# This change is fine. It's here to stop the queue long
# enough for the next change to be subject to the
# reconfiguration, as well as to provide a conflict for the
# next change. This change will succeed and merge.
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
A.addPatchset({'conflict': 'A'})
A.addApproval('Code-Review', 2)
# This change will be in merge conflict. During the
# reconfiguration, we will add a job. We want to make sure
# that doesn't cause it to get stuck.
B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B')
B.addPatchset({'conflict': 'B'})
B.addApproval('Code-Review', 2)
self.fake_gerrit.addEvent(A.addApproval('Approved', 1))
self.fake_gerrit.addEvent(B.addApproval('Approved', 1))
self.waitUntilSettled()
# No jobs have run yet
self.assertEqual(A.data['status'], 'NEW')
self.assertEqual(A.reported, 1)
self.assertEqual(B.data['status'], 'NEW')
self.assertEqual(len(self.history), 0)
# Add the "project-test3" job.
self.commitConfigUpdate('common-config',
'layouts/live-reconfiguration-add-job.yaml')
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
self.waitUntilSettled()
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
self.waitUntilSettled()
self.assertEqual(A.data['status'], 'MERGED')
self.assertEqual(A.reported, 2)
self.assertEqual(B.data['status'], 'NEW')
self.assertIn('Merge Failed', B.messages[-1])
self.assertEqual(self.getJobFromHistory('project-merge').result,
'SUCCESS')
self.assertEqual(self.getJobFromHistory('project-test1').result,
'SUCCESS')
self.assertEqual(self.getJobFromHistory('project-test2').result,
'SUCCESS')
self.assertEqual(self.getJobFromHistory('project-test3').result,
'SUCCESS')
self.assertEqual(len(self.history), 4)
def test_live_reconfiguration_failed_root(self):
# An extrapolation of test_live_reconfiguration_merge_conflict
# that tests a job added to a job tree with a failed root does
# not run.
self.executor_server.hold_jobs_in_build = True
# This change is fine. It's here to stop the queue long
# enough for the next change to be subject to the
# reconfiguration. This change will succeed and merge.
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
A.addPatchset({'conflict': 'A'})
A.addApproval('Code-Review', 2)
self.fake_gerrit.addEvent(A.addApproval('Approved', 1))
self.waitUntilSettled()
self.executor_server.release('.*-merge')
self.waitUntilSettled()
B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B')
self.executor_server.failJob('project-merge', B)
B.addApproval('Code-Review', 2)
self.fake_gerrit.addEvent(B.addApproval('Approved', 1))
self.waitUntilSettled()
self.executor_server.release('.*-merge')
self.waitUntilSettled()
# Both -merge jobs have run, but no others.
self.assertEqual(A.data['status'], 'NEW')
self.assertEqual(A.reported, 1)
self.assertEqual(B.data['status'], 'NEW')
self.assertEqual(B.reported, 1)
self.assertEqual(self.history[0].result, 'SUCCESS')
self.assertEqual(self.history[0].name, 'project-merge')
self.assertEqual(self.history[1].result, 'FAILURE')
self.assertEqual(self.history[1].name, 'project-merge')
self.assertEqual(len(self.history), 2)
# Add the "project-test3" job.
self.commitConfigUpdate('common-config',
'layouts/live-reconfiguration-add-job.yaml')
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
self.waitUntilSettled()
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
self.waitUntilSettled()
self.assertEqual(A.data['status'], 'MERGED')
self.assertEqual(A.reported, 2)
self.assertEqual(B.data['status'], 'NEW')
self.assertEqual(B.reported, 2)
self.assertEqual(self.history[0].result, 'SUCCESS')
self.assertEqual(self.history[0].name, 'project-merge')
self.assertEqual(self.history[1].result, 'FAILURE')
self.assertEqual(self.history[1].name, 'project-merge')
self.assertEqual(self.history[2].result, 'SUCCESS')
self.assertEqual(self.history[3].result, 'SUCCESS')
self.assertEqual(self.history[4].result, 'SUCCESS')
self.assertEqual(len(self.history), 5)
def test_live_reconfiguration_failed_job(self):
# Test that a change with a removed failing job does not
# disrupt reconfiguration. If a change has a failed job and
# that job is removed during a reconfiguration, we observed a
# bug where the code to re-set build statuses would run on
# that build and raise an exception because the job no longer
# existed.
self.executor_server.hold_jobs_in_build = True
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
# This change will fail and later be removed by the reconfiguration.
self.executor_server.failJob('project-test1', A)
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.executor_server.release('.*-merge')
self.waitUntilSettled()
self.executor_server.release('project-test1')
self.waitUntilSettled()
self.assertEqual(A.data['status'], 'NEW')
self.assertEqual(A.reported, 0)
self.assertEqual(self.getJobFromHistory('project-merge').result,
'SUCCESS')
self.assertEqual(self.getJobFromHistory('project-test1').result,
'FAILURE')
self.assertEqual(len(self.history), 2)
# Remove the test1 job.
self.commitConfigUpdate('common-config',
'layouts/live-reconfiguration-failed-job.yaml')
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
self.waitUntilSettled()
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
self.waitUntilSettled()
self.assertEqual(self.getJobFromHistory('project-test2').result,
'SUCCESS')
self.assertEqual(self.getJobFromHistory('project-testfile').result,
'SUCCESS')
self.assertEqual(len(self.history), 4)
self.assertEqual(A.data['status'], 'NEW')
self.assertEqual(A.reported, 1)
self.assertIn('Build succeeded', A.messages[0])
# Ensure the removed job was not included in the report.
self.assertNotIn('project-test1', A.messages[0])
def test_live_reconfiguration_shared_queue(self):
# Test that a change with a failing job which was removed from
# this project but otherwise still exists in the system does
# not disrupt reconfiguration.
self.executor_server.hold_jobs_in_build = True
A = self.fake_gerrit.addFakeChange('org/project1', 'master', 'A')
self.executor_server.failJob('project1-project2-integration', A)
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.executor_server.release('.*-merge')
self.waitUntilSettled()
self.executor_server.release('project1-project2-integration')
self.waitUntilSettled()
self.assertEqual(A.data['status'], 'NEW')
self.assertEqual(A.reported, 0)
self.assertEqual(self.getJobFromHistory('project-merge').result,
'SUCCESS')
self.assertEqual(self.getJobFromHistory(
'project1-project2-integration').result, 'FAILURE')
self.assertEqual(len(self.history), 2)
# Remove the integration job.
self.commitConfigUpdate(
'common-config',
'layouts/live-reconfiguration-shared-queue.yaml')
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
self.waitUntilSettled()
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
self.waitUntilSettled()
self.assertEqual(self.getJobFromHistory('project-merge').result,
'SUCCESS')
self.assertEqual(self.getJobFromHistory('project-test1').result,
'SUCCESS')
self.assertEqual(self.getJobFromHistory('project-test2').result,
'SUCCESS')
self.assertEqual(self.getJobFromHistory(
'project1-project2-integration').result, 'FAILURE')
self.assertEqual(len(self.history), 4)
self.assertEqual(A.data['status'], 'NEW')
self.assertEqual(A.reported, 1)
self.assertIn('Build succeeded', A.messages[0])
# Ensure the removed job was not included in the report.
self.assertNotIn('project1-project2-integration', A.messages[0])
def test_live_reconfiguration_shared_queue_removed(self):
# Test that changes in a shared queue survive a change of the
# queue during reconfiguration. This is a regression test
# for the dependent pipeline manager.
self.executor_server.hold_jobs_in_build = True
A = self.fake_gerrit.addFakeChange('org/project1', 'master', 'A')
B = self.fake_gerrit.addFakeChange('org/project2', 'master', 'B')
A.addApproval('Code-Review', 2)
B.addApproval('Code-Review', 2)
self.fake_gerrit.addEvent(A.addApproval('Approved', 1))
self.fake_gerrit.addEvent(B.addApproval('Approved', 1))
self.waitUntilSettled()
# Remove the integration job.
self.commitConfigUpdate(
'common-config',
'layouts/live-reconfiguration-shared-queue-removed.yaml')
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
self.waitUntilSettled()
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
self.waitUntilSettled()
self.assertEqual(A.data['status'], 'MERGED')
self.assertEqual(B.data['status'], 'MERGED')
def test_double_live_reconfiguration_shared_queue(self):
# This was a real-world regression. A change is added to
# gate; a reconfigure happens, a second change which depends
@ -3801,26 +3495,22 @@ class TestScheduler(ZuulTestCase):
self.assertEqual(B.reported, 2)
def test_live_reconfiguration_del_project(self):
# Test project deletion from layout
# while changes are enqueued
# Test project deletion from tenant while changes are enqueued
self.executor_server.hold_jobs_in_build = True
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
B = self.fake_gerrit.addFakeChange('org/project1', 'master', 'B')
C = self.fake_gerrit.addFakeChange('org/project1', 'master', 'C')
# A Depends-On: B
A.data['commitMessage'] = '%s\n\nDepends-On: %s\n' % (
A.subject, B.data['id'])
self.fake_gerrit.addEvent(B.addApproval('Approved', 1))
self.fake_gerrit.addEvent(B.getPatchsetCreatedEvent(1))
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.fake_gerrit.addEvent(C.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.executor_server.release('.*-merge')
self.waitUntilSettled()
self.assertEqual(len(self.builds), 5)
self.assertEqual(len(self.builds), 8)
self.newTenantConfig('config/single-tenant/main-one-project.yaml')
# This layout defines only org/project, not org/project1
self.commitConfigUpdate(
'common-config',
@ -3828,20 +3518,10 @@ class TestScheduler(ZuulTestCase):
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
self.waitUntilSettled()
# Builds for C aborted, builds for A succeed,
# and have change B applied ahead
job_c = self.getJobFromHistory('project-test1')
self.assertEqual(job_c.changes, '3,1')
self.assertEqual(job_c.result, 'ABORTED')
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
self.waitUntilSettled()
self.assertEqual(
self.getJobFromHistory('project-test1', 'org/project').changes,
'2,1 1,1')
self.assertEqual(A.data['status'], 'NEW')
self.assertEqual(B.data['status'], 'NEW')
self.assertEqual(C.data['status'], 'NEW')
@ -3849,10 +3529,111 @@ class TestScheduler(ZuulTestCase):
self.assertEqual(B.reported, 0)
self.assertEqual(C.reported, 0)
self.assertHistory([
dict(name='project-merge', result='SUCCESS', changes='2,1'),
dict(name='project-merge', result='SUCCESS', changes='1,1'),
dict(name='project-merge', result='SUCCESS', changes='3,1'),
dict(name='project-test1', result='ABORTED', changes='2,1'),
dict(name='project-test2', result='ABORTED', changes='2,1'),
dict(name='project1-project2-integration',
result='ABORTED', changes='2,1'),
dict(name='project-test1', result='ABORTED', changes='3,1'),
dict(name='project-test2', result='ABORTED', changes='3,1'),
dict(name='project1-project2-integration',
result='ABORTED', changes='3,1'),
dict(name='project-test1', result='SUCCESS', changes='1,1'),
dict(name='project-test2', result='SUCCESS', changes='1,1'),
], ordered=False)
tenant = self.scheds.first.sched.abide.tenants.get('tenant-one')
self.assertEqual(len(tenant.layout.pipelines['check'].queues), 0)
self.assertIn('Build succeeded', A.messages[0])
def test_live_reconfiguration_del_pipeline(self):
# Test pipeline deletion while changes are enqueued
self.executor_server.hold_jobs_in_build = True
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.executor_server.release('.*-merge')
self.waitUntilSettled()
self.assertEqual(len(self.builds), 2)
# This layout defines only org/project, not org/project1
self.commitConfigUpdate(
'common-config',
'layouts/live-reconfiguration-del-pipeline.yaml')
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
self.waitUntilSettled()
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
self.waitUntilSettled()
self.assertEqual(A.data['status'], 'NEW')
self.assertEqual(A.reported, 0)
self.assertHistory([
dict(name='project-merge', result='SUCCESS', changes='1,1'),
dict(name='project-test1', result='ABORTED', changes='1,1'),
dict(name='project-test2', result='ABORTED', changes='1,1'),
], ordered=False)
tenant = self.scheds.first.sched.abide.tenants.get('tenant-one')
self.assertEqual(len(tenant.layout.pipelines), 0)
def test_live_reconfiguration_del_tenant(self):
# Test tenant deletion while changes are enqueued
self.executor_server.hold_jobs_in_build = True
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
B = self.fake_gerrit.addFakeChange('org/project1', 'master', 'B')
C = self.fake_gerrit.addFakeChange('org/project1', 'master', 'C')
self.fake_gerrit.addEvent(B.getPatchsetCreatedEvent(1))
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.fake_gerrit.addEvent(C.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.executor_server.release('.*-merge')
self.waitUntilSettled()
self.assertEqual(len(self.builds), 8)
self.newTenantConfig('config/single-tenant/main-no-tenants.yaml')
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
self.waitUntilSettled()
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
self.waitUntilSettled()
self.assertEqual(A.data['status'], 'NEW')
self.assertEqual(B.data['status'], 'NEW')
self.assertEqual(C.data['status'], 'NEW')
self.assertEqual(A.reported, 0)
self.assertEqual(B.reported, 0)
self.assertEqual(C.reported, 0)
self.assertHistory([
dict(name='project-merge', result='SUCCESS', changes='2,1'),
dict(name='project-merge', result='SUCCESS', changes='1,1'),
dict(name='project-merge', result='SUCCESS', changes='3,1'),
dict(name='project-test1', result='ABORTED', changes='2,1'),
dict(name='project-test2', result='ABORTED', changes='2,1'),
dict(name='project1-project2-integration',
result='ABORTED', changes='2,1'),
dict(name='project-test1', result='ABORTED', changes='3,1'),
dict(name='project-test2', result='ABORTED', changes='3,1'),
dict(name='project1-project2-integration',
result='ABORTED', changes='3,1'),
dict(name='project-test1', result='ABORTED', changes='1,1'),
dict(name='project-test2', result='ABORTED', changes='1,1'),
], ordered=False)
tenant = self.scheds.first.sched.abide.tenants.get('tenant-one')
self.assertIsNone(tenant)
@simple_layout("layouts/reconfigure-failed-head.yaml")
def test_live_reconfiguration_failed_change_at_head(self):
# Test that if we reconfigure with a failed change at head,
@ -5427,41 +5208,6 @@ class TestScheduler(ZuulTestCase):
dict(name='job2', result='SUCCESS', changes='1,1 2,1'),
], ordered=False)
@simple_layout('layouts/reconfigure-remove-add.yaml')
def test_reconfigure_remove_add(self):
# Test removing, then adding a job while in queue
self.executor_server.hold_jobs_in_build = True
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
A.addApproval('Code-Review', 2)
self.fake_gerrit.addEvent(A.addApproval('Approved', 1))
self.waitUntilSettled()
self.assertTrue(len(self.builds), 2)
self.executor_server.release('job2')
self.assertTrue(len(self.builds), 1)
# Remove job2
self.commitConfigUpdate('org/common-config',
'layouts/reconfigure-remove-add2.yaml')
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
self.assertTrue(len(self.builds), 1)
# Add job2 back
self.commitConfigUpdate('org/common-config',
'layouts/reconfigure-remove-add.yaml')
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
self.assertTrue(len(self.builds), 2)
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
# This will run new builds for B
self.waitUntilSettled()
self.assertHistory([
dict(name='job2', result='SUCCESS', changes='1,1'),
dict(name='job1', result='SUCCESS', changes='1,1'),
dict(name='job2', result='SUCCESS', changes='1,1'),
], ordered=False)
def test_worker_update_metadata(self):
"Test if a worker can send back metadata about itself"
self.executor_server.hold_jobs_in_build = True
@ -6269,27 +6015,27 @@ For CI problems and help debugging, contact ci@example.org"""
self.fake_nodepool.unpause()
self.waitUntilSettled()
def test_nodepool_job_removal(self):
"Test that nodes are returned unused after job removal"
def test_nodepool_project_removal(self):
"Test that nodes are returned unused after project removal"
self.fake_nodepool.pause()
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
A.addApproval('Code-Review', 2)
self.fake_gerrit.addEvent(A.addApproval('Approved', 1))
A = self.fake_gerrit.addFakeChange('org/project1', 'master', 'A')
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.commitConfigUpdate('common-config', 'layouts/no-jobs.yaml')
self.newTenantConfig('config/single-tenant/main-one-project.yaml')
# This layout defines only org/project, not org/project1
self.commitConfigUpdate(
'common-config',
'layouts/live-reconfiguration-del-project.yaml')
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
self.waitUntilSettled()
self.fake_nodepool.unpause()
self.waitUntilSettled()
self.assertEqual(A.data['status'], 'MERGED')
self.assertEqual(A.reported, 2)
self.assertHistory([
dict(name='gate-noop', result='SUCCESS', changes='1,1'),
])
self.assertEqual(A.data['status'], 'NEW')
self.assertEqual(A.reported, 0)
for node in self.fake_nodepool.getNodes():
self.assertFalse(node['_lock'])
self.assertEqual(node['state'], 'ready')
@ -8011,6 +7757,7 @@ class TestSemaphore(ZuulTestCase):
len(tenant.semaphore_handler.semaphoreHolders("test-semaphore")),
1)
# remove the pipeline
self.commitConfigUpdate(
'common-config',
'config/semaphore/zuul-reconfiguration.yaml')
@ -8029,102 +7776,6 @@ class TestSemaphore(ZuulTestCase):
len(tenant.semaphore_handler.semaphoreHolders("test-semaphore")),
0)
def test_semaphore_reconfigure_job_removal(self):
"Test job removal during reconfiguration with semaphores"
self.executor_server.hold_jobs_in_build = True
tenant = self.scheds.first.sched.abide.tenants.get('tenant-one')
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
self.assertEqual(
len(tenant.semaphore_handler.semaphoreHolders("test-semaphore")),
0)
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.assertEqual(
len(tenant.semaphore_handler.semaphoreHolders("test-semaphore")),
1)
self.commitConfigUpdate(
'common-config',
'config/semaphore/git/common-config/zuul-remove-job.yaml')
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
self.waitUntilSettled()
# Release job project-test1 which should be the only job left
self.executor_server.release('project-test1')
self.waitUntilSettled()
# The check pipeline should be empty
tenant = self.scheds.first.sched.abide.tenants.get('tenant-one')
check_pipeline = tenant.layout.pipelines['check']
items = check_pipeline.getAllItems()
self.assertEqual(len(items), 0)
# The semaphore should be released
self.assertEqual(
len(tenant.semaphore_handler.semaphoreHolders("test-semaphore")),
0)
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
self.waitUntilSettled()
def test_semaphore_reconfigure_job_removal_pending_node_request(self):
"""
Test job removal during reconfiguration with semaphores and pending
node request.
"""
self.executor_server.hold_jobs_in_build = True
# Pause nodepool so we can block the job in node request state during
# reconfiguration.
self.fake_nodepool.pause()
tenant = self.scheds.first.sched.abide.tenants.get('tenant-one')
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
self.assertEqual(
len(tenant.semaphore_handler.semaphoreHolders("test-semaphore")),
0)
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.assertEqual(
len(tenant.semaphore_handler.semaphoreHolders("test-semaphore")),
1)
self.commitConfigUpdate(
'common-config',
'config/semaphore/git/common-config/zuul-remove-job.yaml')
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
self.waitUntilSettled()
# Now we can unpause nodepool
self.fake_nodepool.unpause()
self.waitUntilSettled()
# Release job project-test1 which should be the only job left
self.executor_server.release('project-test1')
self.waitUntilSettled()
# The check pipeline should be empty
tenant = self.scheds.first.sched.abide.tenants.get('tenant-one')
check_pipeline = tenant.layout.pipelines['check']
items = check_pipeline.getAllItems()
self.assertEqual(len(items), 0)
# The semaphore should be released
self.assertEqual(
len(tenant.semaphore_handler.semaphoreHolders("test-semaphore")),
0)
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
self.waitUntilSettled()
def test_semaphore_handler_cleanup(self):
"Test the semaphore handler leak cleanup"
self.executor_server.hold_jobs_in_build = True
@ -8654,56 +8305,6 @@ class TestSchedulerFailFast(ZuulTestCase):
dict(name='project-test6', result='FAILURE', changes='1,1'),
], ordered=False)
def test_fail_fast_reconfigure(self):
"""
Tests that a pipeline that is flagged with fail-fast
doesn't abort jobs when a job is removed during reconfig.
"""
self.executor_server.hold_jobs_in_build = True
self.fake_nodepool.pause()
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
self.executor_server.failJob('project-test1', A)
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
self.waitUntilSettled()
self.assertEqual(len(self.builds), 1)
self.assertEqual(self.builds[0].name, 'project-merge')
self.executor_server.release('project-merge')
self.waitUntilSettled()
# Now project-test1, project-test2 and project-test6
# should be running
self.assertEqual(len(self.builds), 3)
# Commit new config that removes project-test1
self.commitConfigUpdate('common-config',
'layouts/fail-fast-reconfigure.yaml')
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
# Release project-test1
self.executor_server.release('project-test1')
self.waitUntilSettled()
self.fake_nodepool.unpause()
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
self.waitUntilSettled()
self.assertEqual(len(self.builds), 0)
self.assertEqual(A.reported, 1)
self.assertEqual(A.patchsets[0]['approvals'][0]['value'], "1")
self.assertHistory([
dict(name='project-merge', result='SUCCESS', changes='1,1'),
dict(name='project-test1', result='ABORTED', changes='1,1'),
dict(name='project-test2', result='SUCCESS', changes='1,1'),
dict(name='project-test3', result='SUCCESS', changes='1,1'),
dict(name='project-test4', result='SUCCESS', changes='1,1'),
dict(name='project-test5', result='SUCCESS', changes='1,1'),
dict(name='project-test6', result='SUCCESS', changes='1,1'),
], ordered=False)
def test_fail_fast_retry(self):
"""
Tests that a retried build doesn't trigger fail-fast.

View File

@ -20,6 +20,7 @@ import os
import sys
import textwrap
import gc
from time import sleep
from unittest import skip, skipIf
import paramiko
@ -2440,11 +2441,15 @@ class TestGlobalRepoState(AnsibleZuulTestCase):
def test_inherited_playbooks(self):
# Test that the repo state is restored globally for the whole buildset
# including inherited projects not in the dependency chain.
self.executor_server.hold_jobs_in_build = True
self.executor_server.hold_jobs_in_start = True
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
A.addApproval('Approved', 1)
self.fake_gerrit.addEvent(A.addApproval('Code-Review', 2))
self.waitUntilSettled()
for _ in iterate_timeout(30, 'Wait for build to be in starting phase'):
if self.executor_server.job_workers:
sleep(1)
break
# The build test1 is running while test2 is waiting for test1.
self.assertEqual(len(self.builds), 1)
@ -2467,8 +2472,12 @@ class TestGlobalRepoState(AnsibleZuulTestCase):
self.log.info('Merge test change on common-config')
B.setMerged()
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
# Reset repo to ensure the cached repo has the failing commit. This
# is needed to ensure that the repo state has been restored.
repo = self.executor_server.merger.getRepo('gerrit', 'common-config')
repo.reset()
self.executor_server.hold_jobs_in_start = False
self.waitUntilSettled()
self.assertHistory([
dict(name='test1', result='SUCCESS', changes='1,1'),
@ -2478,12 +2487,16 @@ class TestGlobalRepoState(AnsibleZuulTestCase):
def test_required_projects(self):
# Test that the repo state is restored globally for the whole buildset
# including required projects not in the dependency chain.
self.executor_server.hold_jobs_in_build = True
self.executor_server.hold_jobs_in_start = True
A = self.fake_gerrit.addFakeChange('org/requiringproject', 'master',
'A')
A.addApproval('Approved', 1)
self.fake_gerrit.addEvent(A.addApproval('Code-Review', 2))
self.waitUntilSettled()
for _ in iterate_timeout(30, 'Wait for build to be in starting phase'):
if self.executor_server.job_workers:
sleep(1)
break
# The build require-test1 is running,
# require-test2 is waiting for require-test1.
@ -2505,8 +2518,13 @@ class TestGlobalRepoState(AnsibleZuulTestCase):
self.log.info('Merge test change on common-config')
B.setMerged()
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
# Reset repo to ensure the cached repo has the failing commit. This
# is needed to ensure that the repo state has been restored.
repo = self.executor_server.merger.getRepo(
'gerrit', 'org/requiredproject')
repo.reset()
self.executor_server.hold_jobs_in_start = False
self.waitUntilSettled()
self.assertHistory([
dict(name='require-test1', result='SUCCESS', changes='1,1'),
@ -2516,7 +2534,7 @@ class TestGlobalRepoState(AnsibleZuulTestCase):
def test_dependent_project(self):
# Test that the repo state is restored globally for the whole buildset
# including dependent projects.
self.executor_server.hold_jobs_in_build = True
self.executor_server.hold_jobs_in_start = True
B = self.fake_gerrit.addFakeChange('org/requiredproject', 'master',
'B')
A = self.fake_gerrit.addFakeChange('org/dependentproject', 'master',
@ -2524,7 +2542,11 @@ class TestGlobalRepoState(AnsibleZuulTestCase):
A.setDependsOn(B, 1)
A.addApproval('Approved', 1)
self.fake_gerrit.addEvent(A.addApproval('Code-Review', 2))
self.waitUntilSettled()
for _ in iterate_timeout(30, 'Wait for build to be in starting phase'):
if self.executor_server.job_workers:
sleep(1)
break
# The build dependent-test1 is running,
# dependent-test2 is waiting for dependent-test1.
@ -2546,8 +2568,13 @@ class TestGlobalRepoState(AnsibleZuulTestCase):
self.log.info('Merge test change on common-config')
C.setMerged()
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
# Reset repo to ensure the cached repo has the failing commit. This
# is needed to ensure that the repo state has been restored.
repo = self.executor_server.merger.getRepo(
'gerrit', 'org/requiredproject')
repo.reset()
self.executor_server.hold_jobs_in_start = False
self.waitUntilSettled()
self.assertHistory([
dict(name='dependent-test1', result='SUCCESS', changes='1,1 2,1'),
@ -5922,9 +5949,6 @@ class TestJobPause(AnsibleZuulTestCase):
def test_job_reconfigure_resume(self):
"""
Tests that a paused job is resumed after reconfiguration
Tests that a paused job is resumed after a reconfiguration removed the
last job which is in progress.
"""
self.wait_timeout = 120
@ -5942,25 +5966,16 @@ class TestJobPause(AnsibleZuulTestCase):
self.assertEqual(len(self.builds), 2, 'compile and test in progress')
# Remove the test1 job.
self.commitConfigUpdate(
'org/project6',
'config/job-pause/git/org_project6/zuul-reconfigure.yaml')
self.scheds.execute(lambda app: app.sched.reconfigure(app.config))
self.waitUntilSettled()
# The "compile" job might be paused during the waitUntilSettled
# call and appear settled; it should automatically resume
# though, so just wait for it.
for x in iterate_timeout(60, 'job compile finished'):
if not self.builds:
break
self.executor_server.release('test')
self.waitUntilSettled()
self.assertHistory([
dict(name='compile', result='SUCCESS', changes='1,1'),
dict(name='test', result='ABORTED', changes='1,1'),
])
dict(name='test', result='SUCCESS', changes='1,1'),
], ordered=False)
def test_job_pause_skipped_child(self):
"""

View File

@ -1083,8 +1083,11 @@ class AnsibleJob(object):
for project in args['projects']:
self.log.debug("Cloning %s/%s" % (project['connection'],
project['name'],))
repo = merger.getRepo(project['connection'],
project['name'])
repo = merger.getRepo(
project['connection'],
project['name'],
repo_state=repo_state,
process_worker=self.executor_server.process_worker)
repos[project['canonical_name']] = repo
# The commit ID of the original item (before merging). Used
@ -1737,7 +1740,7 @@ class AnsibleJob(object):
if not jobdir_playbook.trusted:
path = self.checkoutUntrustedProject(project, branch, args)
else:
path = self.checkoutTrustedProject(project, branch)
path = self.checkoutTrustedProject(project, branch, args)
path = os.path.join(path, playbook['path'])
jobdir_playbook.path = self.findPlaybook(
@ -1760,7 +1763,7 @@ class AnsibleJob(object):
self.writeAnsibleConfig(jobdir_playbook)
def checkoutTrustedProject(self, project, branch):
def checkoutTrustedProject(self, project, branch, args):
root = self.jobdir.getTrustedProject(project.canonical_name,
branch)
if not root:
@ -1772,8 +1775,12 @@ class AnsibleJob(object):
root,
self.executor_server.merge_root,
self.log)
merger.checkoutBranch(project.connection_name, project.name,
branch)
merger.checkoutBranch(
project.connection_name, project.name,
branch,
repo_state=args['repo_state'],
process_worker=self.executor_server.process_worker,
zuul_event_id=self.zuul_event_id)
else:
self.log.debug("Using existing repo %s@%s in trusted space %s",
project, branch, root)
@ -1804,16 +1811,25 @@ class AnsibleJob(object):
self.log)
break
repo_state = None
if merger is None:
merger = self.executor_server._getMerger(
root,
self.executor_server.merge_root,
self.log)
# If we don't have this repo yet prepared we need to restore
# the repo state. Otherwise we have speculative merges in the
# repo and must not restore the repo state again.
repo_state = args['repo_state']
self.log.debug("Cloning %s@%s into new untrusted space %s",
project, branch, root)
merger.checkoutBranch(project.connection_name, project.name,
branch)
merger.checkoutBranch(
project.connection_name, project.name,
branch, repo_state=repo_state,
process_worker=self.executor_server.process_worker,
zuul_event_id=self.zuul_event_id)
else:
self.log.debug("Using existing repo %s@%s in trusted space %s",
project, branch, root)
@ -1889,7 +1905,7 @@ class AnsibleJob(object):
if not jobdir_playbook.trusted:
path = self.checkoutUntrustedProject(project, branch, args)
else:
path = self.checkoutTrustedProject(project, branch)
path = self.checkoutTrustedProject(project, branch, args)
# The name of the symlink is the requested name of the role
# (which may be the repo name or may be something else; this

View File

@ -288,15 +288,19 @@ class PipelineManager(metaclass=ABCMeta):
# failing.
if item_ahead_valid:
change_queue.moveItem(item, old_item_ahead)
# Get an updated copy of the layout and update the job
# graph if necessary. This resumes the buildset merge
# Get an updated copy of the layout, but if we have a
# job graph already, then keep it (our repo state and
# jobs are frozen and will now only update if the item
# ahead changes). This resumes the buildset merge
# state machine. If we have an up-to-date layout, it
# will go ahead and refresh the job graph if needed;
# or it will send a new merge job if necessary, or it
# will do nothing if we're waiting on a merge job.
# will go ahead and refresh the job graph if there
# isn't one; or it will send a new merge job if
# necessary, or it will do nothing if we're waiting on
# a merge job.
has_job_graph = bool(item.job_graph)
item.job_graph = None
item.layout = None
# If the item is no longer active, but has a job graph we
# will make sure to update it.
if item.active or has_job_graph:

View File

@ -760,7 +760,8 @@ class Merger(object):
# behavior e.g. to keep the 'origin' remote intact.
self.execution_context = execution_context
def _addProject(self, hostname, project_name, url, sshkey, zuul_event_id):
def _addProject(self, hostname, connection_name, project_name, url, sshkey,
repo_state, zuul_event_id, process_worker=None):
repo = None
key = '/'.join([hostname, project_name])
try:
@ -776,6 +777,11 @@ class Merger(object):
logger=self.logger, git_timeout=self.git_timeout,
zuul_event_id=zuul_event_id)
# If we got a repo state restore it
if repo_state:
self._restoreRepoState(
connection_name, project_name, repo, repo_state,
zuul_event_id, process_worker=process_worker)
self.repos[key] = repo
except Exception:
log = get_annotated_logger(self.log, zuul_event_id)
@ -783,7 +789,8 @@ class Merger(object):
hostname, project_name)
return repo
def getRepo(self, connection_name, project_name, zuul_event_id=None):
def getRepo(self, connection_name, project_name,
repo_state=None, zuul_event_id=None, process_worker=None):
source = self.connections.getSource(connection_name)
project = source.getProject(project_name)
hostname = project.canonical_hostname
@ -799,8 +806,9 @@ class Merger(object):
raise Exception("Unable to set up repo for project %s/%s"
" without a url" %
(connection_name, project_name,))
return self._addProject(hostname, project_name, url, sshkey,
zuul_event_id)
return self._addProject(hostname, connection_name, project_name, url,
sshkey, repo_state, zuul_event_id,
process_worker=process_worker)
def updateRepo(self, connection_name, project_name, repo_state=None,
zuul_event_id=None,
@ -832,11 +840,14 @@ class Merger(object):
raise
def checkoutBranch(self, connection_name, project_name, branch,
zuul_event_id=None):
repo_state=None, zuul_event_id=None,
process_worker=None):
log = get_annotated_logger(self.log, zuul_event_id)
log.info("Checking out %s/%s branch %s",
connection_name, project_name, branch)
repo = self.getRepo(connection_name, project_name,
repo_state=repo_state,
process_worker=process_worker,
zuul_event_id=zuul_event_id)
repo.checkout(branch, zuul_event_id=zuul_event_id)

View File

@ -725,6 +725,10 @@ class Scheduler(threading.Thread):
if event.validate_tenants is None:
for tenant in abide.tenants.values():
self._reconfigureTenant(tenant)
for old_tenant in self.abide.tenants.values():
if not abide.tenants.get(old_tenant.name):
# We deleted a tenant
self._reconfigureDeleteTenant(old_tenant)
self.abide = abide
else:
loading_errors = []
@ -791,6 +795,8 @@ class Scheduler(threading.Thread):
tenant = abide.tenants.get(tenant_name)
if tenant is not None:
self._reconfigureTenant(tenant)
else:
self._reconfigureDeleteTenant(old_tenant)
self.abide = abide
duration = round(time.monotonic() - start, 3)
self.log.info("Smart reconfiguration of tenants %s complete "
@ -836,6 +842,10 @@ class Scheduler(threading.Thread):
(trusted, new_project) = tenant.getProject(project.canonical_name)
if new_project:
return new_project
if item.live:
return None
# If this is a non-live item we may be looking at a
# "foreign" project, ie, one which is not defined in the
# config but is constructed ad-hoc to satisfy a
@ -860,6 +870,7 @@ class Scheduler(threading.Thread):
source = child_project.source
new_project = source.getProject(project.name)
return new_project
return None
def _reenqueueTenant(self, old_tenant, tenant):
@ -890,8 +901,8 @@ class Scheduler(threading.Thread):
items_to_remove = []
builds_to_cancel = []
requests_to_cancel = []
last_head = None
for shared_queue in old_pipeline.queues:
last_head = None
# Attempt to keep window sizes from shrinking where possible
project, branch = shared_queue.project_branches[0]
new_queue = new_pipeline.getQueue(project, branch)
@ -899,14 +910,10 @@ class Scheduler(threading.Thread):
new_queue.window = max(shared_queue.window,
new_queue.window_floor)
for item in shared_queue.queue:
if not item.item_ahead:
last_head = item
item.pipeline = None
item.queue = None
item.change.project = self._reenqueueGetProject(
tenant, item)
# If the old item ahead made it in, re-enqueue
# this one behind it.
new_project = self._reenqueueGetProject(
tenant, item)
if item.item_ahead in items_to_remove:
old_item_ahead = None
item_ahead_valid = False
@ -916,7 +923,12 @@ class Scheduler(threading.Thread):
item.item_ahead = None
item.items_behind = []
reenqueued = False
if item.change.project:
if new_project:
item.change.project = new_project
item.pipeline = None
item.queue = None
if not old_item_ahead or not last_head:
last_head = item
try:
reenqueued = new_pipeline.manager.reEnqueueItem(
item, last_head, old_item_ahead,
@ -960,6 +972,10 @@ class Scheduler(threading.Thread):
"Canceling node request %s during reconfiguration",
request)
self.cancelJob(build_set, request.job)
for name, old_pipeline in old_tenant.layout.pipelines.items():
new_pipeline = tenant.layout.pipelines.get(name)
if not new_pipeline:
self._reconfigureDeletePipeline(old_pipeline)
def _reconfigureTenant(self, tenant):
# This is called from _doReconfigureEvent while holding the
@ -993,6 +1009,41 @@ class Scheduler(threading.Thread):
self.log.exception("Exception reporting initial "
"pipeline stats:")
def _reconfigureDeleteTenant(self, tenant):
# Called when a tenant is deleted during reconfiguration
self.log.info("Removing tenant %s during reconfiguration" %
(tenant,))
for pipeline in tenant.layout.pipelines.values():
self._reconfigureDeletePipeline(pipeline)
def _reconfigureDeletePipeline(self, pipeline):
self.log.info("Removing pipeline %s during reconfiguration" %
(pipeline,))
for shared_queue in pipeline.queues:
builds_to_cancel = []
requests_to_cancel = []
for item in shared_queue.queue:
item.item_ahead = None
item.items_behind = []
self.log.info(
"Removing item %s during reconfiguration" % (item,))
for build in item.current_build_set.getBuilds():
builds_to_cancel.append(build)
for request_job, request in \
item.current_build_set.node_requests.items():
requests_to_cancel.append(
(item.current_build_set, request))
for build in builds_to_cancel:
self.log.info(
"Canceling build %s during reconfiguration" % (build,))
self.cancelJob(build.build_set, build.job, build=build)
for build_set, request in requests_to_cancel:
self.log.info(
"Canceling node request %s during reconfiguration",
request)
self.cancelJob(build_set, request.job)
def _doPromoteEvent(self, event):
tenant = self.abide.tenants.get(event.tenant_name)
pipeline = tenant.layout.pipelines[event.pipeline_name]