Merge "executor: run cleanup playbook on stop"
This commit is contained in:
commit
b300d6520d
|
@ -1834,6 +1834,8 @@ class FakeBuild(object):
|
|||
|
||||
|
||||
class RecordingAnsibleJob(zuul.executor.server.AnsibleJob):
|
||||
result = None
|
||||
|
||||
def doMergeChanges(self, merger, items, repo_state):
|
||||
# Get a merger in order to update the repos involved in this job.
|
||||
commit = super(RecordingAnsibleJob, self).doMergeChanges(
|
||||
|
@ -1843,8 +1845,12 @@ class RecordingAnsibleJob(zuul.executor.server.AnsibleJob):
|
|||
return commit
|
||||
|
||||
def recordResult(self, result):
|
||||
build = self.executor_server.job_builds[self.job.unique]
|
||||
self.executor_server.lock.acquire()
|
||||
build = self.executor_server.job_builds.get(self.job.unique)
|
||||
if not build:
|
||||
self.executor_server.lock.release()
|
||||
# Already recorded
|
||||
return
|
||||
self.executor_server.build_history.append(
|
||||
BuildHistory(name=build.name, result=result, changes=build.changes,
|
||||
node=build.node, uuid=build.unique,
|
||||
|
@ -1861,12 +1867,19 @@ class RecordingAnsibleJob(zuul.executor.server.AnsibleJob):
|
|||
build = self.executor_server.job_builds[self.job.unique]
|
||||
build.jobdir = self.jobdir
|
||||
|
||||
result = super(RecordingAnsibleJob, self).runPlaybooks(args)
|
||||
self.recordResult(result)
|
||||
return result
|
||||
self.result = super(RecordingAnsibleJob, self).runPlaybooks(args)
|
||||
if self.result is None:
|
||||
# Record result now because cleanup won't be performed
|
||||
self.recordResult(None)
|
||||
return self.result
|
||||
|
||||
def runCleanupPlaybooks(self):
|
||||
super(RecordingAnsibleJob, self).runCleanupPlaybooks()
|
||||
if self.result is not None:
|
||||
self.recordResult(self.result)
|
||||
|
||||
def runAnsible(self, cmd, timeout, playbook, ansible_version,
|
||||
wrapped=True):
|
||||
wrapped=True, cleanup=False):
|
||||
build = self.executor_server.job_builds[self.job.unique]
|
||||
|
||||
if self.executor_server._run_ansible:
|
||||
|
@ -1876,7 +1889,7 @@ class RecordingAnsibleJob(zuul.executor.server.AnsibleJob):
|
|||
build.run()
|
||||
|
||||
result = super(RecordingAnsibleJob, self).runAnsible(
|
||||
cmd, timeout, playbook, ansible_version, wrapped)
|
||||
cmd, timeout, playbook, ansible_version, wrapped, cleanup)
|
||||
else:
|
||||
if playbook.path:
|
||||
result = build.run()
|
||||
|
|
5
tests/fixtures/config/cleanup-playbook/git/common-config/playbooks/cleanup.yaml
vendored
Normal file
5
tests/fixtures/config/cleanup-playbook/git/common-config/playbooks/cleanup.yaml
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
- hosts: all
|
||||
tasks:
|
||||
- file:
|
||||
path: "{{zuul._test.test_root}}/{{zuul.build}}.cleanup.flag"
|
||||
state: touch
|
3
tests/fixtures/config/cleanup-playbook/git/common-config/playbooks/failure.yaml
vendored
Normal file
3
tests/fixtures/config/cleanup-playbook/git/common-config/playbooks/failure.yaml
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
- hosts: all
|
||||
tasks:
|
||||
- fail: msg="This is a failure"
|
13
tests/fixtures/config/cleanup-playbook/git/common-config/playbooks/post.yaml
vendored
Normal file
13
tests/fixtures/config/cleanup-playbook/git/common-config/playbooks/post.yaml
vendored
Normal file
|
@ -0,0 +1,13 @@
|
|||
- hosts: all
|
||||
tasks:
|
||||
- debug: var=waitpath
|
||||
- file:
|
||||
path: "{{zuul._test.test_root}}/{{zuul.build}}.post_start.flag"
|
||||
state: touch
|
||||
# Do not finish until test creates the flag file
|
||||
- wait_for:
|
||||
state: present
|
||||
path: "{{waitpath}}"
|
||||
- file:
|
||||
path: "{{zuul._test.test_root}}/{{zuul.build}}.post_end.flag"
|
||||
state: touch
|
|
@ -0,0 +1,2 @@
|
|||
- hosts: all
|
||||
tasks: []
|
2
tests/fixtures/config/cleanup-playbook/git/common-config/playbooks/python27.yaml
vendored
Normal file
2
tests/fixtures/config/cleanup-playbook/git/common-config/playbooks/python27.yaml
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
- hosts: all
|
||||
tasks: []
|
|
@ -0,0 +1,31 @@
|
|||
- pipeline:
|
||||
name: check
|
||||
manager: independent
|
||||
post-review: true
|
||||
trigger:
|
||||
gerrit:
|
||||
- event: patchset-created
|
||||
success:
|
||||
gerrit:
|
||||
Verified: 1
|
||||
failure:
|
||||
gerrit:
|
||||
Verified: -1
|
||||
|
||||
- job:
|
||||
name: base
|
||||
parent: null
|
||||
|
||||
- job:
|
||||
name: python27
|
||||
pre-run: playbooks/pre.yaml
|
||||
post-run: playbooks/post.yaml
|
||||
cleanup-run: playbooks/cleanup.yaml
|
||||
vars:
|
||||
waitpath: '{{zuul._test.test_root}}/{{zuul.build}}/test_wait'
|
||||
run: playbooks/python27.yaml
|
||||
|
||||
- job:
|
||||
name: python27-failure
|
||||
cleanup-run: playbooks/cleanup.yaml
|
||||
run: playbooks/failure.yaml
|
|
@ -0,0 +1,5 @@
|
|||
- project:
|
||||
name: org/project
|
||||
check:
|
||||
jobs:
|
||||
- python27
|
|
@ -0,0 +1 @@
|
|||
test
|
|
@ -0,0 +1,8 @@
|
|||
- tenant:
|
||||
name: tenant-one
|
||||
source:
|
||||
gerrit:
|
||||
config-projects:
|
||||
- common-config
|
||||
untrusted-projects:
|
||||
- org/project
|
|
@ -2891,6 +2891,93 @@ class TestPostPlaybooks(AnsibleZuulTestCase):
|
|||
self.assertFalse(os.path.exists(post_end))
|
||||
|
||||
|
||||
class TestCleanupPlaybooks(AnsibleZuulTestCase):
|
||||
tenant_config_file = 'config/cleanup-playbook/main.yaml'
|
||||
|
||||
def test_cleanup_playbook_success(self):
|
||||
# Test that the cleanup run is performed
|
||||
self.executor_server.verbose = True
|
||||
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||
|
||||
for _ in iterate_timeout(60, 'job started'):
|
||||
if len(self.builds):
|
||||
break
|
||||
build = self.builds[0]
|
||||
|
||||
post_start = os.path.join(self.test_root, build.uuid +
|
||||
'.post_start.flag')
|
||||
for _ in iterate_timeout(60, 'job post running'):
|
||||
if os.path.exists(post_start):
|
||||
break
|
||||
with open(os.path.join(self.test_root, build.uuid, 'test_wait'),
|
||||
"w") as of:
|
||||
of.write("continue")
|
||||
self.waitUntilSettled()
|
||||
|
||||
build = self.getJobFromHistory('python27')
|
||||
self.assertEqual('SUCCESS', build.result)
|
||||
cleanup_flag = os.path.join(self.test_root, build.uuid +
|
||||
'.cleanup.flag')
|
||||
self.assertTrue(os.path.exists(cleanup_flag))
|
||||
|
||||
def test_cleanup_playbook_failure(self):
|
||||
# Test that the cleanup run is performed
|
||||
self.executor_server.verbose = True
|
||||
|
||||
in_repo_conf = textwrap.dedent(
|
||||
"""
|
||||
- project:
|
||||
check:
|
||||
jobs:
|
||||
- python27-failure
|
||||
""")
|
||||
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A',
|
||||
files={'.zuul.yaml': in_repo_conf})
|
||||
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||
for _ in iterate_timeout(60, 'job started'):
|
||||
if len(self.builds):
|
||||
break
|
||||
self.waitUntilSettled()
|
||||
|
||||
build = self.getJobFromHistory('python27-failure')
|
||||
self.assertEqual('FAILURE', build.result)
|
||||
cleanup_flag = os.path.join(self.test_root, build.uuid +
|
||||
'.cleanup.flag')
|
||||
self.assertTrue(os.path.exists(cleanup_flag))
|
||||
|
||||
def test_cleanup_playbook_abort(self):
|
||||
# Test that when we abort a job the cleanup run is performed
|
||||
self.executor_server.verbose = True
|
||||
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
|
||||
|
||||
for _ in iterate_timeout(60, 'job started'):
|
||||
if len(self.builds):
|
||||
break
|
||||
build = self.builds[0]
|
||||
|
||||
post_start = os.path.join(self.test_root, build.uuid +
|
||||
'.post_start.flag')
|
||||
for _ in iterate_timeout(60, 'job post running'):
|
||||
if os.path.exists(post_start):
|
||||
break
|
||||
# The post playbook has started, abort the job
|
||||
self.fake_gerrit.addEvent(A.getChangeAbandonedEvent())
|
||||
self.waitUntilSettled()
|
||||
|
||||
build = self.getJobFromHistory('python27')
|
||||
self.assertEqual('ABORTED', build.result)
|
||||
|
||||
post_end = os.path.join(self.test_root, build.uuid +
|
||||
'.post_end.flag')
|
||||
cleanup_flag = os.path.join(self.test_root, build.uuid +
|
||||
'.cleanup.flag')
|
||||
self.assertTrue(os.path.exists(cleanup_flag))
|
||||
self.assertTrue(os.path.exists(post_start))
|
||||
self.assertFalse(os.path.exists(post_end))
|
||||
|
||||
|
||||
class TestBrokenTrustedConfig(ZuulTestCase):
|
||||
# Test we can deal with a broken config only with trusted projects. This
|
||||
# is different then TestBrokenConfig, as it does not have a missing
|
||||
|
|
|
@ -717,6 +717,9 @@ class AnsibleJob(object):
|
|||
self.paused = False
|
||||
self.aborted = False
|
||||
self.aborted_reason = None
|
||||
self.cleaned = False
|
||||
self.cleanup_lock = threading.Lock()
|
||||
self.cleanup_started = False
|
||||
self._resume_event = threading.Event()
|
||||
self.thread = None
|
||||
self.project_info = {}
|
||||
|
@ -997,6 +1000,10 @@ class AnsibleJob(object):
|
|||
|
||||
result = self.runPlaybooks(args)
|
||||
|
||||
if result is not None:
|
||||
# Only run cleanup when playbooks ran (e.g. result is not None)
|
||||
self.runCleanupPlaybooks()
|
||||
|
||||
# Stop the persistent SSH connections.
|
||||
setup_status, setup_code = self.runAnsibleCleanup(
|
||||
self.jobdir.setup_playbook)
|
||||
|
@ -1195,6 +1202,8 @@ class AnsibleJob(object):
|
|||
key, (time.monotonic() - self.time_starting_build) * 1000)
|
||||
|
||||
self.started = True
|
||||
# Record ansible version being used for the cleanup phase
|
||||
self.ansible_version = ansible_version
|
||||
time_started = time.time()
|
||||
# timeout value is "total" job timeout which accounts for
|
||||
# pre-run and run playbooks. post-run is different because
|
||||
|
@ -1288,6 +1297,29 @@ class AnsibleJob(object):
|
|||
|
||||
return result
|
||||
|
||||
def runCleanupPlaybooks(self):
|
||||
if not self.jobdir.cleanup_playbooks:
|
||||
return
|
||||
|
||||
# TODO: make this configurable
|
||||
cleanup_timeout = 300
|
||||
|
||||
with open(self.jobdir.job_output_file, 'a') as job_output:
|
||||
job_output.write("{now} | Running Ansible cleanup...\n".format(
|
||||
now=datetime.datetime.now()
|
||||
))
|
||||
|
||||
with self.cleanup_lock:
|
||||
if self.cleaned:
|
||||
# Cleanup phase may already ran when multiple aborts got issued
|
||||
return
|
||||
self.cleanup_started = True
|
||||
for index, playbook in enumerate(self.jobdir.cleanup_playbooks):
|
||||
self.runAnsiblePlaybook(
|
||||
playbook, cleanup_timeout, self.ansible_version,
|
||||
phase='cleanup', index=index)
|
||||
self.cleaned = True
|
||||
|
||||
def _logFinalPlaybookError(self):
|
||||
# Failures in the final post playbook can include failures
|
||||
# uploading logs, which makes diagnosing issues difficult.
|
||||
|
@ -1875,19 +1907,24 @@ class AnsibleJob(object):
|
|||
|
||||
def abortRunningProc(self):
|
||||
with self.proc_lock:
|
||||
if not self.proc:
|
||||
if self.proc and not self.cleanup_started:
|
||||
self.log.debug("Abort: sending kill signal to job "
|
||||
"process group")
|
||||
try:
|
||||
pgid = os.getpgid(self.proc.pid)
|
||||
os.killpg(pgid, signal.SIGKILL)
|
||||
except Exception:
|
||||
self.log.exception(
|
||||
"Exception while killing ansible process:")
|
||||
elif self.proc and self.cleanup_started:
|
||||
self.log.debug("Abort: cleanup is in progress")
|
||||
else:
|
||||
self.log.debug("Abort: no process is running")
|
||||
return
|
||||
self.log.debug("Abort: sending kill signal to job "
|
||||
"process group")
|
||||
try:
|
||||
pgid = os.getpgid(self.proc.pid)
|
||||
os.killpg(pgid, signal.SIGKILL)
|
||||
except Exception:
|
||||
self.log.exception("Exception while killing ansible process:")
|
||||
if self.started and not self.cleaned:
|
||||
self.runCleanupPlaybooks()
|
||||
|
||||
def runAnsible(self, cmd, timeout, playbook, ansible_version,
|
||||
wrapped=True):
|
||||
wrapped=True, cleanup=False):
|
||||
config_file = playbook.ansible_config
|
||||
env_copy = os.environ.copy()
|
||||
env_copy.update(self.ssh_agent.env)
|
||||
|
@ -1956,7 +1993,7 @@ class AnsibleJob(object):
|
|||
env_copy['HOME'] = self.jobdir.work_root
|
||||
|
||||
with self.proc_lock:
|
||||
if self.aborted:
|
||||
if self.aborted and not cleanup:
|
||||
return (self.RESULT_ABORTED, None)
|
||||
self.log.debug("Ansible command: ANSIBLE_CONFIG=%s ZUUL_JOBDIR=%s "
|
||||
"ZUUL_JOB_LOG_CONFIG=%s PYTHONPATH=%s TMP=%s %s",
|
||||
|
@ -2101,6 +2138,9 @@ class AnsibleJob(object):
|
|||
now=datetime.datetime.now(),
|
||||
line=line.decode('utf-8').rstrip()))
|
||||
|
||||
if self.aborted:
|
||||
return (self.RESULT_ABORTED, None)
|
||||
|
||||
return (self.RESULT_NORMAL, ret)
|
||||
|
||||
def runAnsibleSetup(self, playbook, ansible_version):
|
||||
|
@ -2220,7 +2260,8 @@ class AnsibleJob(object):
|
|||
|
||||
self.emitPlaybookBanner(playbook, 'START', phase)
|
||||
|
||||
result, code = self.runAnsible(cmd, timeout, playbook, ansible_version)
|
||||
result, code = self.runAnsible(cmd, timeout, playbook, ansible_version,
|
||||
cleanup=phase == 'cleanup')
|
||||
self.log.debug("Ansible complete, result %s code %s" % (
|
||||
self.RESULT_MAP[result], code))
|
||||
if self.executor_server.statsd:
|
||||
|
|
Loading…
Reference in New Issue