Merge "Fix canceling builds in starting phase"
This commit is contained in:
commit
83175b3fe0
|
@ -2210,6 +2210,12 @@ class RecordingAnsibleJob(zuul.executor.server.AnsibleJob):
|
|||
merger, items, repo_state)
|
||||
if not commit: # merge conflict
|
||||
self.recordResult('MERGER_FAILURE')
|
||||
|
||||
for _ in iterate_timeout(60, 'wait for merge'):
|
||||
if not self.executor_server.hold_jobs_in_start:
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
return commit
|
||||
|
||||
def recordResult(self, result):
|
||||
|
@ -2284,6 +2290,10 @@ class RecordingAnsibleJob(zuul.executor.server.AnsibleJob):
|
|||
build.paused = False
|
||||
super().resume()
|
||||
|
||||
def _send_aborted(self):
|
||||
self.recordResult('ABORTED')
|
||||
super()._send_aborted()
|
||||
|
||||
|
||||
class RecordingMergeClient(zuul.merger.client.MergeClient):
|
||||
|
||||
|
@ -2322,6 +2332,7 @@ class RecordingExecutorServer(zuul.executor.server.ExecutorServer):
|
|||
self._ansible_manager_class = FakeAnsibleManager
|
||||
super(RecordingExecutorServer, self).__init__(*args, **kw)
|
||||
self.hold_jobs_in_build = False
|
||||
self.hold_jobs_in_start = False
|
||||
self.lock = threading.Lock()
|
||||
self.running_builds = []
|
||||
self.build_history = []
|
||||
|
|
|
@ -2540,6 +2540,33 @@ class TestScheduler(ZuulTestCase):
|
|||
self.assertEqual(A.reported, 0, "Abandoned change should not report")
|
||||
self.assertEqual(B.reported, 1, "Change should report")
|
||||
|
||||
def test_cancel_starting_build(self):
|
||||
"Test that a canceled build that is not processed yet is removed"
|
||||
|
||||
self.executor_server.hold_jobs_in_start = True
|
||||
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||
|
||||
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||
for _ in iterate_timeout(30, 'Wait for build to be in starting phase'):
|
||||
if self.executor_server.job_workers:
|
||||
break
|
||||
|
||||
# Abandon change to cancel build
|
||||
self.fake_gerrit.addEvent(A.getChangeAbandonedEvent())
|
||||
|
||||
for _ in iterate_timeout(30, 'Wait for executor:stop request'):
|
||||
stop_jobs = [x for x in self.gearman_server.jobs_history
|
||||
if b'executor:stop' in x.name]
|
||||
if stop_jobs:
|
||||
break
|
||||
|
||||
self.executor_server.hold_jobs_in_start = False
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.assertHistory([
|
||||
dict(name='project-merge', result='ABORTED')
|
||||
])
|
||||
|
||||
def test_abandoned_not_timer(self):
|
||||
"Test that an abandoned change does not cancel timer jobs"
|
||||
# This test can not use simple_layout because it must start
|
||||
|
|
|
@ -389,8 +389,7 @@ class ExecutorClient(object):
|
|||
log.debug("Build has no associated gearman job")
|
||||
return False
|
||||
|
||||
# TODOv3(jeblair): make a nicer way of recording build start.
|
||||
if build.url is not None:
|
||||
if build.__gearman_worker is not None:
|
||||
log.debug("Build has already started")
|
||||
self.cancelRunningBuild(build)
|
||||
log.debug("Canceled running build")
|
||||
|
@ -406,12 +405,12 @@ class ExecutorClient(object):
|
|||
time.sleep(1)
|
||||
|
||||
log.debug("Still unable to find build to cancel")
|
||||
if build.url:
|
||||
if build.__gearman_worker is not None:
|
||||
log.debug("Build has just started")
|
||||
self.cancelRunningBuild(build)
|
||||
log.debug("Canceled running build")
|
||||
return True
|
||||
log.debug("Unable to cancel build")
|
||||
log.error("Unable to cancel build")
|
||||
|
||||
def onBuildCompleted(self, job, result=None):
|
||||
if job.unique in self.meta_jobs:
|
||||
|
@ -487,6 +486,7 @@ class ExecutorClient(object):
|
|||
build.url = data.get('url', build.url)
|
||||
# Update information about worker
|
||||
build.worker.updateFromData(data)
|
||||
build.__gearman_worker = build.worker.name
|
||||
|
||||
if 'paused' in data and build.paused != data['paused']:
|
||||
build.paused = data['paused']
|
||||
|
@ -496,7 +496,6 @@ class ExecutorClient(object):
|
|||
|
||||
if not started:
|
||||
self.log.info("Build %s started" % job)
|
||||
build.__gearman_worker = data.get('worker_name')
|
||||
self.sched.onBuildStarted(build)
|
||||
else:
|
||||
self.log.error("Unable to find build %s" % job.unique)
|
||||
|
|
|
@ -829,6 +829,10 @@ class AnsibleJob(object):
|
|||
def execute(self):
|
||||
try:
|
||||
self.time_starting_build = time.monotonic()
|
||||
|
||||
# report that job has been taken
|
||||
self.job.sendWorkData(json.dumps(self._base_job_data()))
|
||||
|
||||
self.ssh_agent.start()
|
||||
self.ssh_agent.add(self.private_key_file)
|
||||
for key in self.arguments.get('ssh_keys', []):
|
||||
|
@ -862,6 +866,22 @@ class AnsibleJob(object):
|
|||
except Exception:
|
||||
self.log.exception("Error finalizing job thread:")
|
||||
|
||||
def _base_job_data(self):
|
||||
return {
|
||||
# TODO(mordred) worker_name is needed as a unique name for the
|
||||
# client to use for cancelling jobs on an executor. It's
|
||||
# defaulting to the hostname for now, but in the future we
|
||||
# should allow setting a per-executor override so that one can
|
||||
# run more than one executor on a host.
|
||||
'worker_name': self.executor_server.hostname,
|
||||
'worker_hostname': self.executor_server.hostname,
|
||||
'worker_log_port': self.executor_server.log_streaming_port,
|
||||
}
|
||||
|
||||
def _send_aborted(self):
|
||||
result = dict(result='ABORTED')
|
||||
self.job.sendWorkComplete(json.dumps(result))
|
||||
|
||||
def _execute(self):
|
||||
args = self.arguments
|
||||
self.log.info(
|
||||
|
@ -914,6 +934,11 @@ class AnsibleJob(object):
|
|||
'branches': task.branches,
|
||||
}
|
||||
|
||||
# Early abort if abort requested
|
||||
if self.aborted:
|
||||
self._send_aborted()
|
||||
return
|
||||
|
||||
self.log.debug("Git updates complete")
|
||||
merger = self.executor_server._getMerger(
|
||||
self.jobdir.src_root,
|
||||
|
@ -939,10 +964,20 @@ class AnsibleJob(object):
|
|||
# a work complete result, don't run any jobs
|
||||
return
|
||||
|
||||
# Early abort if abort requested
|
||||
if self.aborted:
|
||||
self._send_aborted()
|
||||
return
|
||||
|
||||
state_items = [i for i in args['items'] if not i.get('number')]
|
||||
if state_items:
|
||||
merger.setRepoState(state_items, repo_state)
|
||||
|
||||
# Early abort if abort requested
|
||||
if self.aborted:
|
||||
self._send_aborted()
|
||||
return
|
||||
|
||||
for project in args['projects']:
|
||||
repo = repos[project['canonical_name']]
|
||||
# If this project is the Zuul project and this is a ref
|
||||
|
@ -979,30 +1014,31 @@ class AnsibleJob(object):
|
|||
for repo in repos.values():
|
||||
repo.setRemoteUrl('file:///dev/null')
|
||||
|
||||
# Early abort if abort requested
|
||||
if self.aborted:
|
||||
self._send_aborted()
|
||||
return
|
||||
|
||||
# This prepares each playbook and the roles needed for each.
|
||||
self.preparePlaybooks(args)
|
||||
|
||||
self.prepareAnsibleFiles(args)
|
||||
self.writeLoggingConfig()
|
||||
|
||||
data = {
|
||||
# TODO(mordred) worker_name is needed as a unique name for the
|
||||
# client to use for cancelling jobs on an executor. It's defaulting
|
||||
# to the hostname for now, but in the future we should allow
|
||||
# setting a per-executor override so that one can run more than
|
||||
# one executor on a host.
|
||||
'worker_name': self.executor_server.hostname,
|
||||
'worker_hostname': self.executor_server.hostname,
|
||||
'worker_log_port': self.executor_server.log_streaming_port
|
||||
}
|
||||
# Early abort if abort requested
|
||||
if self.aborted:
|
||||
self._send_aborted()
|
||||
return
|
||||
|
||||
data = self._base_job_data()
|
||||
if self.executor_server.log_streaming_port != DEFAULT_FINGER_PORT:
|
||||
data['url'] = "finger://{hostname}:{port}/{uuid}".format(
|
||||
hostname=data['worker_hostname'],
|
||||
port=data['worker_log_port'],
|
||||
hostname=self.executor_server.hostname,
|
||||
port=self.executor_server.log_streaming_port,
|
||||
uuid=self.job.unique)
|
||||
else:
|
||||
data['url'] = 'finger://{hostname}/{uuid}'.format(
|
||||
hostname=data['worker_hostname'],
|
||||
hostname=self.executor_server.hostname,
|
||||
uuid=self.job.unique)
|
||||
|
||||
self.job.sendWorkData(json.dumps(data))
|
||||
|
|
Loading…
Reference in New Issue