Browse Source

Fix error reporting for special task failures

For some tasks the Ansible log will not contain enough information to
debug failures (e.g. missing role with include_role).

Ansible treats those issues not like an error (exit code 1) but like a
failed task, leading to an exit code of 2.

Change-Id: Iea754814e3d55be6be1c2de7f2d45ceda757f480
changes/10/635110/3
Simon Westphahl 2 years ago
parent
commit
6e424878ae
4 changed files with 37 additions and 11 deletions
  1. +4
    -0
      tests/fixtures/config/job-output/git/common-config/playbooks/job-output-missing-role-include.yaml
  2. +5
    -0
      tests/fixtures/config/job-output/git/common-config/zuul.yaml
  3. +7
    -3
      tests/unit/test_v3.py
  4. +21
    -8
      zuul/executor/server.py

+ 4
- 0
tests/fixtures/config/job-output/git/common-config/playbooks/job-output-missing-role-include.yaml View File

@ -0,0 +1,4 @@
- hosts: all
tasks:
- include_role:
name: not_existing

+ 5
- 0
tests/fixtures/config/job-output/git/common-config/zuul.yaml View File

@ -30,6 +30,10 @@
name: job-output-missing-role
run: playbooks/job-output-missing-role.yaml
- job:
name: job-output-missing-role-include
run: playbooks/job-output-missing-role-include.yaml
- project:
name: org/project
check:
@ -47,3 +51,4 @@
check:
jobs:
- job-output-missing-role
- job-output-missing-role-include

+ 7
- 3
tests/unit/test_v3.py View File

@ -4360,11 +4360,15 @@ class TestJobOutput(AnsibleZuulTestCase):
self.assertHistory([
dict(name='job-output-missing-role', result='FAILURE',
changes='1,1'),
dict(name='job-output-missing-role-include', result='FAILURE',
changes='1,1'),
], ordered=False)
job_output = self._get_file(self.history[0],
'work/logs/job-output.txt')
self.assertIn('the role \'not_existing\' was not found', job_output)
for history in self.history:
job_output = self._get_file(history,
'work/logs/job-output.txt')
self.assertIn('the role \'not_existing\' was not found',
job_output)
def test_job_output_failure_log(self):
logger = logging.getLogger('zuul.AnsibleJob')


+ 21
- 8
zuul/executor/server.py View File

@ -1948,14 +1948,27 @@ class AnsibleJob(object):
now=datetime.datetime.now(),
line=line.decode('utf-8').rstrip()))
elif ret == 2:
# This is a workaround to detect winrm connection failures that are
# not detected by ansible. These can be detected if the string
# 'FATAL ERROR DURING FILE TRANSFER' is in the ansible output.
# In this case we should treat the host as unreachable and retry
# the job.
for line in syntax_buffer:
if b'FATAL ERROR DURING FILE TRANSFER' in line:
return self.RESULT_UNREACHABLE, None
with open(self.jobdir.job_output_file, 'a') as job_output:
found_marker = False
for line in syntax_buffer:
# This is a workaround to detect winrm connection failures
# that are not detected by ansible. These can be detected
# if the string 'FATAL ERROR DURING FILE TRANSFER' is in
# the ansible output. In this case we should treat the
# host as unreachable and retry the job.
if b'FATAL ERROR DURING FILE TRANSFER' in line:
return self.RESULT_UNREACHABLE, None
# Extract errors for special cases that are treated like
# task errors by Ansible (e.g. missing role when using
# 'include_role').
if line.startswith(b'ERROR!'):
found_marker = True
if not found_marker:
continue
job_output.write("{now} | {line}\n".format(
now=datetime.datetime.now(),
line=line.decode('utf-8').rstrip()))
return (self.RESULT_NORMAL, ret)


Loading…
Cancel
Save