Add config-error reporter and report config errors to DB

This adds a config-error pipeline reporter configuration option and
now also reports config errors and merge conflicts to the database
as buildset failures.

The driving use case is that if periodic pipelines encounter config
errors (such as being unable to freeze a job graph), they might send
email if configured to send email on merge conflicts, but otherwise
their results are not reported to the database.

To make this more visible, first we need Zuul pipelines to report
buildset ends to the database in more cases -- currently we typically
only report a buildset end if there are jobs (and so a buildset start),
or in some other special cases.  This change adds config errors and
merge conflicts to the set of cases where we report a buildset end.

Because of some shortcuts previously taken, that would end up reporting
a merge conflict message to the database instead of the actual error
message.  To resolve this, we add a new config-error reporter action
and adjust the config error reporter handling path to use it instead
of the merge-conflicts action.

Tests of this as well as the merge-conflicts code path are added.

Finally, a small debug aid is added to the GerritReporter so that we
can easily see in the logs which reporter action was used.

Change-Id: I805c26a88675bf15ae9d0d6c8999b178185e4f1f
This commit is contained in:
James E. Blair 2022-08-22 14:35:25 -07:00
parent 4c3dd064d7
commit 5ac9367b25
9 changed files with 129 additions and 10 deletions

View File

@ -332,9 +332,16 @@ success, the pipeline reports back to Gerrit with ``Verified`` vote of
.. attr:: merge-conflict
These reporters describe what Zuul should do if it is unable to
merge in the patchset. If no merge-conflict reporters are listed
then the ``failure`` reporters will be used to notify of
unsuccessful merges.
merge the patchset into the current state of the target
branch. If no merge-conflict reporters are listed then the
``failure`` reporters will be used.
.. attr:: config-error
These reporters describe what Zuul should do if it encounters a
configuration error while trying to enqueue the item. If no
config-error reporters are listed then the ``failure`` reporters
will be used.
.. attr:: enqueue

View File

@ -0,0 +1,6 @@
---
features:
- |
A new :attr:`pipeline.config-error` pipeline reporter is available
for customizing reporter actions related to Zuul configuration
errors.

View File

@ -0,0 +1,32 @@
- pipeline:
name: check
manager: independent
trigger:
gerrit:
- event: patchset-created
success:
gerrit:
Verified: 1
failure:
gerrit:
Verified: -1
- job:
name: base
parent: null
run: playbooks/base.yaml
- job:
name: project-test1
run: playbooks/project-test1.yaml
- job:
name: project-test2
run: playbooks/project-test2.yaml
- project:
name: org/project
check:
jobs:
- project-test2:
dependencies: project-test1

View File

@ -0,0 +1,26 @@
- pipeline:
name: periodic
manager: independent
trigger:
timer:
- time: '* * * * * */1'
- job:
name: base
parent: null
run: playbooks/base.yaml
- job:
name: project-test1
run: playbooks/project-test1.yaml
- job:
name: project-test2
run: playbooks/project-test2.yaml
- project:
name: org/project
periodic:
jobs:
- project-test2:
dependencies: project-test1

View File

@ -5343,6 +5343,11 @@ For CI problems and help debugging, contact ci@example.org"""
self.assertIn('Error merging gerrit/org/project', B.messages[0])
self.assertNotIn('logs.example.com', B.messages[0])
self.assertNotIn('SKIPPED', B.messages[0])
buildsets = list(
self.scheds.first.connections.connections[
'database'].getBuildsets())
self.assertEqual(buildsets[0].result, 'MERGE_CONFLICT')
self.assertIn('This change or one of', buildsets[0].message)
def test_submit_failure(self):
A = self.fake_gerrit.addFakeChange('org/project1', 'master', 'A')
@ -5357,6 +5362,33 @@ For CI problems and help debugging, contact ci@example.org"""
'database'].getBuildsets())
self.assertEqual(buildsets[0].result, 'MERGE_FAILURE')
@simple_layout('layouts/timer-freeze-job-failure.yaml')
def test_periodic_freeze_job_failure(self):
self.waitUntilSettled()
for x in iterate_timeout(30, 'buildset complete'):
buildsets = list(
self.scheds.first.connections.connections[
'database'].getBuildsets())
if buildsets:
break
self.assertEqual(buildsets[0].result, 'CONFIG_ERROR')
self.assertIn('Job project-test2 depends on project-test1 '
'which was not run', buildsets[0].message)
@simple_layout('layouts/freeze-job-failure.yaml')
def test_freeze_job_failure(self):
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
buildsets = list(
self.scheds.first.connections.connections[
'database'].getBuildsets())
self.assertEqual(buildsets[0].result, 'CONFIG_ERROR')
self.assertIn('Job project-test2 depends on project-test1 '
'which was not run', buildsets[0].message)
@simple_layout('layouts/nonvoting-pipeline.yaml')
def test_nonvoting_pipeline(self):
"Test that a nonvoting pipeline (experimental) can still report"

View File

@ -1176,6 +1176,7 @@ class PipelineParser(object):
'success': 'success_actions',
'failure': 'failure_actions',
'merge-conflict': 'merge_conflict_actions',
'config-error': 'config_error_actions',
'no-jobs': 'no_jobs_actions',
'disabled': 'disabled_actions',
'dequeue': 'dequeue_actions',
@ -1250,7 +1251,7 @@ class PipelineParser(object):
pipeline['trigger'] = vs.Required(self.getDriverSchema('trigger'))
for action in ['enqueue', 'start', 'success', 'failure',
'merge-conflict', 'merge-failure', 'no-jobs',
'disabled', 'dequeue']:
'disabled', 'dequeue', 'config-error']:
pipeline[action] = self.getDriverSchema('reporter')
return vs.Schema(pipeline)
@ -1318,6 +1319,10 @@ class PipelineParser(object):
if not pipeline.merge_conflict_actions:
pipeline.merge_conflict_actions = pipeline.failure_actions
# If config-error actions aren't explicit, use the failure actions
if not pipeline.config_error_actions:
pipeline.config_error_actions = pipeline.failure_actions
pipeline.disable_at = conf.get(
'disable-after-consecutive-failures', None)

View File

@ -36,6 +36,9 @@ class GerritReporter(BaseReporter):
self._checks_api = action.pop('checks-api', None)
self._labels = action
def __repr__(self):
return f"<GerritReporter: {self._action}>"
def report(self, item, phase1=True, phase2=True):
"""Send a message to gerrit."""
log = get_annotated_logger(self.log, item.event)

View File

@ -322,9 +322,10 @@ class PipelineManager(metaclass=ABCMeta):
(item, ret))
def reportNormalBuildsetEnd(self, build_set, action, final, result=None):
# Report a buildset end, but only if there are jobs
if (build_set.job_graph and
len(build_set.job_graph.jobs) > 0):
# Report a buildset end if there are jobs or errors
if ((build_set.job_graph and len(build_set.job_graph.jobs) > 0) or
build_set.config_errors or
build_set.unable_to_merge):
self.sql.reportBuildsetEnd(build_set, action,
final, result)
@ -2013,9 +2014,8 @@ class PipelineManager(metaclass=ABCMeta):
item.setReportedResult('NO_JOBS')
elif item.getConfigErrors():
log.debug("Invalid config for change %s", item.change)
# TODOv3(jeblair): consider a new reporter action for this
action = 'merge-conflict'
actions = self.pipeline.merge_conflict_actions
action = 'config-error'
actions = self.pipeline.config_error_actions
item.setReportedResult('CONFIG_ERROR')
elif item.didMergerFail():
log.debug("Merge conflict")

View File

@ -135,6 +135,7 @@ class BaseReporter(object, metaclass=abc.ABCMeta):
'failure': self._formatItemReportFailure,
'merge-conflict': self._formatItemReportMergeConflict,
'merge-failure': self._formatItemReportMergeFailure,
'config-error': self._formatItemReportConfigError,
'no-jobs': self._formatItemReportNoJobs,
'disabled': self._formatItemReportDisabled,
'dequeue': self._formatItemReportDequeue,
@ -222,6 +223,13 @@ class BaseReporter(object, metaclass=abc.ABCMeta):
def _formatItemReportMergeFailure(self, item, with_jobs=True):
return 'This change was not merged by the code review system.\n'
def _formatItemReportConfigError(self, item, with_jobs=True):
if item.getConfigErrors():
msg = str(item.getConfigErrors()[0].error)
else:
msg = "Unknown configuration error"
return msg
def _formatItemReportNoJobs(self, item, with_jobs=True):
status_url = get_default(self.connection.sched.config,
'web', 'status_url', '')