Merge "Fix rare semaphore leak during reconfiguration"
This commit is contained in:
commit
b12d0d6164
|
@ -0,0 +1,110 @@
|
||||||
|
- pipeline:
|
||||||
|
name: check
|
||||||
|
manager: independent
|
||||||
|
trigger:
|
||||||
|
gerrit:
|
||||||
|
- event: patchset-created
|
||||||
|
success:
|
||||||
|
gerrit:
|
||||||
|
Verified: 1
|
||||||
|
failure:
|
||||||
|
gerrit:
|
||||||
|
Verified: -1
|
||||||
|
|
||||||
|
- semaphore:
|
||||||
|
name: test-semaphore-two
|
||||||
|
max: 2
|
||||||
|
|
||||||
|
- job:
|
||||||
|
name: base
|
||||||
|
parent: null
|
||||||
|
nodeset:
|
||||||
|
nodes:
|
||||||
|
- name: controller
|
||||||
|
label: label1
|
||||||
|
|
||||||
|
- job:
|
||||||
|
name: project-test1
|
||||||
|
run: playbooks/project-test1.yaml
|
||||||
|
|
||||||
|
- job:
|
||||||
|
name: semaphore-one-test1
|
||||||
|
semaphore: test-semaphore
|
||||||
|
run: playbooks/semaphore-one-test1.yaml
|
||||||
|
|
||||||
|
- job:
|
||||||
|
name: semaphore-one-test2
|
||||||
|
semaphore: test-semaphore
|
||||||
|
run: playbooks/semaphore-one-test2.yaml
|
||||||
|
|
||||||
|
- job:
|
||||||
|
name: semaphore-two-test1
|
||||||
|
semaphore: test-semaphore-two
|
||||||
|
run: playbooks/semaphore-two-test1.yaml
|
||||||
|
|
||||||
|
- job:
|
||||||
|
name: semaphore-two-test2
|
||||||
|
semaphore: test-semaphore-two
|
||||||
|
run: playbooks/semaphore-two-test2.yaml
|
||||||
|
|
||||||
|
- job:
|
||||||
|
name: semaphore-one-test3
|
||||||
|
semaphore: test-semaphore
|
||||||
|
run: playbooks/semaphore-two-test1.yaml
|
||||||
|
nodeset:
|
||||||
|
nodes:
|
||||||
|
- name: controller
|
||||||
|
label: label1
|
||||||
|
|
||||||
|
- job:
|
||||||
|
name: semaphore-one-test1-resources-first
|
||||||
|
semaphore:
|
||||||
|
name: test-semaphore
|
||||||
|
resources-first: True
|
||||||
|
run: playbooks/semaphore-one-test1.yaml
|
||||||
|
|
||||||
|
- job:
|
||||||
|
name: semaphore-one-test2-resources-first
|
||||||
|
semaphore:
|
||||||
|
name: test-semaphore
|
||||||
|
resources-first: True
|
||||||
|
run: playbooks/semaphore-one-test1.yaml
|
||||||
|
|
||||||
|
- project:
|
||||||
|
name: org/project
|
||||||
|
check:
|
||||||
|
jobs:
|
||||||
|
- project-test1
|
||||||
|
# This is the difference to zuul.yaml that is used in
|
||||||
|
# test_semaphore_reconfigure_job_removal:
|
||||||
|
# - semaphore-one-test1
|
||||||
|
# - semaphore-one-test2
|
||||||
|
|
||||||
|
|
||||||
|
- project:
|
||||||
|
name: org/project1
|
||||||
|
check:
|
||||||
|
jobs:
|
||||||
|
- project-test1
|
||||||
|
- semaphore-two-test1
|
||||||
|
- semaphore-two-test2
|
||||||
|
|
||||||
|
- project:
|
||||||
|
name: org/project2
|
||||||
|
check:
|
||||||
|
jobs:
|
||||||
|
- semaphore-one-test3
|
||||||
|
|
||||||
|
- project:
|
||||||
|
name: org/project3
|
||||||
|
check:
|
||||||
|
jobs:
|
||||||
|
- project-test1
|
||||||
|
- semaphore-one-test1-resources-first
|
||||||
|
- semaphore-one-test2-resources-first
|
||||||
|
|
||||||
|
- project:
|
||||||
|
name: org/project4
|
||||||
|
check:
|
||||||
|
jobs:
|
||||||
|
- semaphore-one-test1-resources-first
|
|
@ -6584,6 +6584,96 @@ class TestSemaphore(ZuulTestCase):
|
||||||
self.assertFalse('test-semaphore' in
|
self.assertFalse('test-semaphore' in
|
||||||
tenant.semaphore_handler.semaphores)
|
tenant.semaphore_handler.semaphores)
|
||||||
|
|
||||||
|
def test_semaphore_reconfigure_job_removal(self):
|
||||||
|
"Test job removal during reconfiguration with semaphores"
|
||||||
|
self.executor_server.hold_jobs_in_build = True
|
||||||
|
tenant = self.sched.abide.tenants.get('tenant-one')
|
||||||
|
|
||||||
|
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||||
|
self.assertFalse('test-semaphore' in
|
||||||
|
tenant.semaphore_handler.semaphores)
|
||||||
|
|
||||||
|
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
self.assertTrue('test-semaphore' in
|
||||||
|
tenant.semaphore_handler.semaphores)
|
||||||
|
|
||||||
|
self.commitConfigUpdate(
|
||||||
|
'common-config',
|
||||||
|
'config/semaphore/git/common-config/zuul-remove-job.yaml')
|
||||||
|
self.sched.reconfigure(self.config)
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
# Release job project-test1 which should be the only job left
|
||||||
|
self.executor_server.release('project-test1')
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
# The check pipeline should be empty
|
||||||
|
tenant = self.sched.abide.tenants.get('tenant-one')
|
||||||
|
check_pipeline = tenant.layout.pipelines['check']
|
||||||
|
items = check_pipeline.getAllItems()
|
||||||
|
self.assertEqual(len(items), 0)
|
||||||
|
|
||||||
|
# The semaphore should be released
|
||||||
|
self.assertFalse('test-semaphore' in
|
||||||
|
tenant.semaphore_handler.semaphores)
|
||||||
|
|
||||||
|
self.executor_server.hold_jobs_in_build = False
|
||||||
|
self.executor_server.release()
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
def test_semaphore_reconfigure_job_removal_pending_node_request(self):
|
||||||
|
"""
|
||||||
|
Test job removal during reconfiguration with semaphores and pending
|
||||||
|
node request.
|
||||||
|
"""
|
||||||
|
self.executor_server.hold_jobs_in_build = True
|
||||||
|
|
||||||
|
# Pause nodepool so we can block the job in node request state during
|
||||||
|
# reconfiguration.
|
||||||
|
self.fake_nodepool.pause()
|
||||||
|
|
||||||
|
tenant = self.sched.abide.tenants.get('tenant-one')
|
||||||
|
|
||||||
|
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||||
|
self.assertFalse('test-semaphore' in
|
||||||
|
tenant.semaphore_handler.semaphores)
|
||||||
|
|
||||||
|
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
self.assertTrue('test-semaphore' in
|
||||||
|
tenant.semaphore_handler.semaphores)
|
||||||
|
|
||||||
|
self.commitConfigUpdate(
|
||||||
|
'common-config',
|
||||||
|
'config/semaphore/git/common-config/zuul-remove-job.yaml')
|
||||||
|
self.sched.reconfigure(self.config)
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
# Now we can unpause nodepool
|
||||||
|
self.fake_nodepool.unpause()
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
# Release job project-test1 which should be the only job left
|
||||||
|
self.executor_server.release('project-test1')
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
# The check pipeline should be empty
|
||||||
|
tenant = self.sched.abide.tenants.get('tenant-one')
|
||||||
|
check_pipeline = tenant.layout.pipelines['check']
|
||||||
|
items = check_pipeline.getAllItems()
|
||||||
|
self.assertEqual(len(items), 0)
|
||||||
|
|
||||||
|
# The semaphore should be released
|
||||||
|
self.assertFalse('test-semaphore' in
|
||||||
|
tenant.semaphore_handler.semaphores)
|
||||||
|
|
||||||
|
self.executor_server.hold_jobs_in_build = False
|
||||||
|
self.executor_server.release()
|
||||||
|
self.waitUntilSettled()
|
||||||
|
|
||||||
|
|
||||||
class TestSemaphoreMultiTenant(ZuulTestCase):
|
class TestSemaphoreMultiTenant(ZuulTestCase):
|
||||||
tenant_config_file = 'config/multi-tenant-semaphore/main.yaml'
|
tenant_config_file = 'config/multi-tenant-semaphore/main.yaml'
|
||||||
|
|
|
@ -836,7 +836,7 @@ class PipelineManager(object):
|
||||||
self.log.debug("Item %s status is now:\n %s" %
|
self.log.debug("Item %s status is now:\n %s" %
|
||||||
(item, item.formatStatus()))
|
(item, item.formatStatus()))
|
||||||
|
|
||||||
if build.retry:
|
if build.retry and build.build_set.getJobNodeSet(build.job.name):
|
||||||
build.build_set.removeJobNodeSet(build.job.name)
|
build.build_set.removeJobNodeSet(build.job.name)
|
||||||
|
|
||||||
self._resumeBuilds(build.build_set)
|
self._resumeBuilds(build.build_set)
|
||||||
|
|
|
@ -830,6 +830,8 @@ class Scheduler(threading.Thread):
|
||||||
item.current_build_set.node_requests.items():
|
item.current_build_set.node_requests.items():
|
||||||
requests_to_cancel.append(
|
requests_to_cancel.append(
|
||||||
(item.current_build_set, request))
|
(item.current_build_set, request))
|
||||||
|
|
||||||
|
semaphores_to_release = []
|
||||||
for build in builds_to_cancel:
|
for build in builds_to_cancel:
|
||||||
self.log.info(
|
self.log.info(
|
||||||
"Canceling build %s during reconfiguration" % (build,))
|
"Canceling build %s during reconfiguration" % (build,))
|
||||||
|
@ -848,14 +850,16 @@ class Scheduler(threading.Thread):
|
||||||
self.log.exception(
|
self.log.exception(
|
||||||
"Exception while removing nodeset from build %s "
|
"Exception while removing nodeset from build %s "
|
||||||
"for change %s" % (build, build.build_set.item.change))
|
"for change %s" % (build, build.build_set.item.change))
|
||||||
tenant.semaphore_handler.release(
|
semaphores_to_release.append((build.build_set.item, build.job))
|
||||||
build.build_set.item, build.job)
|
|
||||||
for build_set, request in requests_to_cancel:
|
for build_set, request in requests_to_cancel:
|
||||||
self.log.info(
|
self.log.info(
|
||||||
"Canceling node request %s during reconfiguration",
|
"Canceling node request %s during reconfiguration",
|
||||||
request)
|
request)
|
||||||
self.nodepool.cancelRequest(request)
|
self.nodepool.cancelRequest(request)
|
||||||
build_set.removeJobNodeRequest(request.job.name)
|
build_set.removeJobNodeRequest(request.job.name)
|
||||||
|
semaphores_to_release.append((build_set.item, request.job))
|
||||||
|
for item, job in semaphores_to_release:
|
||||||
|
tenant.semaphore_handler.release(item, job)
|
||||||
|
|
||||||
def _reconfigureTenant(self, tenant):
|
def _reconfigureTenant(self, tenant):
|
||||||
# This is called from _doReconfigureEvent while holding the
|
# This is called from _doReconfigureEvent while holding the
|
||||||
|
|
Loading…
Reference in New Issue