Browse Source

Fix error reporting for special task failures

For some tasks the Ansible log will not contain enough information to
debug failures (e.g. missing role with include_role).

Ansible treats those issues not like an error (exit code 1) but like a
failed task, leading to an exit code of 2.

Change-Id: Iea754814e3d55be6be1c2de7f2d45ceda757f480
Simon Westphahl 2 months ago
parent
commit
6e424878ae

+ 4
- 0
tests/fixtures/config/job-output/git/common-config/playbooks/job-output-missing-role-include.yaml View File

@@ -0,0 +1,4 @@
1
+- hosts: all
2
+  tasks:
3
+    - include_role:
4
+        name: not_existing

+ 5
- 0
tests/fixtures/config/job-output/git/common-config/zuul.yaml View File

@@ -30,6 +30,10 @@
30 30
     name: job-output-missing-role
31 31
     run: playbooks/job-output-missing-role.yaml
32 32
 
33
+- job:
34
+    name: job-output-missing-role-include
35
+    run: playbooks/job-output-missing-role-include.yaml
36
+
33 37
 - project:
34 38
     name: org/project
35 39
     check:
@@ -47,3 +51,4 @@
47 51
     check:
48 52
       jobs:
49 53
         - job-output-missing-role
54
+        - job-output-missing-role-include

+ 7
- 3
tests/unit/test_v3.py View File

@@ -4360,11 +4360,15 @@ class TestJobOutput(AnsibleZuulTestCase):
4360 4360
         self.assertHistory([
4361 4361
             dict(name='job-output-missing-role', result='FAILURE',
4362 4362
                  changes='1,1'),
4363
+            dict(name='job-output-missing-role-include', result='FAILURE',
4364
+                 changes='1,1'),
4363 4365
         ], ordered=False)
4364 4366
 
4365
-        job_output = self._get_file(self.history[0],
4366
-                                    'work/logs/job-output.txt')
4367
-        self.assertIn('the role \'not_existing\' was not found', job_output)
4367
+        for history in self.history:
4368
+            job_output = self._get_file(history,
4369
+                                        'work/logs/job-output.txt')
4370
+            self.assertIn('the role \'not_existing\' was not found',
4371
+                          job_output)
4368 4372
 
4369 4373
     def test_job_output_failure_log(self):
4370 4374
         logger = logging.getLogger('zuul.AnsibleJob')

+ 21
- 8
zuul/executor/server.py View File

@@ -1948,14 +1948,27 @@ class AnsibleJob(object):
1948 1948
                         now=datetime.datetime.now(),
1949 1949
                         line=line.decode('utf-8').rstrip()))
1950 1950
         elif ret == 2:
1951
-            # This is a workaround to detect winrm connection failures that are
1952
-            # not detected by ansible. These can be detected if the string
1953
-            # 'FATAL ERROR DURING FILE TRANSFER' is in the ansible output.
1954
-            # In this case we should treat the host as unreachable and retry
1955
-            # the job.
1956
-            for line in syntax_buffer:
1957
-                if b'FATAL ERROR DURING FILE TRANSFER' in line:
1958
-                    return self.RESULT_UNREACHABLE, None
1951
+            with open(self.jobdir.job_output_file, 'a') as job_output:
1952
+                found_marker = False
1953
+                for line in syntax_buffer:
1954
+                    # This is a workaround to detect winrm connection failures
1955
+                    # that are not detected by ansible. These can be detected
1956
+                    # if the string 'FATAL ERROR DURING FILE TRANSFER' is in
1957
+                    # the ansible output. In this case we should treat the
1958
+                    # host as unreachable and retry the job.
1959
+                    if b'FATAL ERROR DURING FILE TRANSFER' in line:
1960
+                        return self.RESULT_UNREACHABLE, None
1961
+
1962
+                    # Extract errors for special cases that are treated like
1963
+                    # task errors by Ansible (e.g. missing role when using
1964
+                    # 'include_role').
1965
+                    if line.startswith(b'ERROR!'):
1966
+                        found_marker = True
1967
+                    if not found_marker:
1968
+                        continue
1969
+                    job_output.write("{now} | {line}\n".format(
1970
+                        now=datetime.datetime.now(),
1971
+                        line=line.decode('utf-8').rstrip()))
1959 1972
 
1960 1973
         return (self.RESULT_NORMAL, ret)
1961 1974
 

Loading…
Cancel
Save