Report max 50 file comments to github

The github api limits us to report 50 file comments per api
request. Since the check run reporting is in the critical path don't
do paging but limit this to 50 comments. In order to get a diverse
feedback sort the messages by 'uniqueness' first so that 100 equal
comments don't push out 5 other comments that are valuable.

Change-Id: If5a7c246a58b510d8fcfa31bd71d74c84acbc278
This commit is contained in:
Tobias Henkel 2020-11-16 17:20:41 +01:00 committed by James E. Blair
parent ab9e808def
commit 808db0aec9
4 changed files with 177 additions and 2 deletions

View File

@ -0,0 +1,74 @@
- hosts: all
tasks:
- zuul_return:
data:
zuul:
file_comments:
bigfile:
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
# 10
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
# 20
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
# 30
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
# 40
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
# 50
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Annoying comment"}
- {line: 1, message: "Useful comment"}
- {line: 1, message: "Useful comment"}
- {line: 2, message: "Useful comment"}
- {line: 2, message: "Insightful comment"}

View File

@ -2,11 +2,19 @@
parent: base
name: file-comments
run: playbooks/file-comments.yaml
files: README
- job:
parent: base
name: file-comments-error
run: playbooks/file-comments-error.yaml
files: README
- job:
parent: base
name: file-comments-big
run: playbooks/file-comments-big.yaml
files: bigfile
- project:
name: org/project
@ -14,3 +22,4 @@
jobs:
- file-comments
- file-comments-error
- file-comments-big

View File

@ -2154,6 +2154,74 @@ class TestCheckRunAnnotations(ZuulGithubAppTestCase, AnsibleZuulTestCase):
"end_line": 3,
})
def test_many_file_comments(self):
# Test that we only send 50 comments to github
project = "org/project"
github = self.fake_github.getGithubClient(None)
# The file must be part of this PR to make the comment function
# work. Thus we change it's content to provide some more text.
files_dict = {
"bigfile": textwrap.dedent(
"""
section one
===========
here is some text
"""
),
}
A = self.fake_github.openFakePullRequest(
project, "master", "A", files=files_dict
)
self.fake_github.emitEvent(A.getPullRequestOpenedEvent())
self.waitUntilSettled()
# We should have a pending check for the head sha
self.assertIn(
A.head_sha, github.repo_from_project(project)._commits.keys())
check_runs = self.fake_github.getCommitChecks(project, A.head_sha)
self.assertEqual(1, len(check_runs))
check_run = check_runs[0]
self.assertEqual("tenant-one/check", check_run["name"])
self.assertEqual("completed", check_run["status"])
self.assertThat(
check_run["output"]["summary"],
MatchesRegex(r'.*Build succeeded.*', re.DOTALL)
)
annotations = check_run["output"]["annotations"]
self.assertEqual(50, len(annotations))
# Comments are sorted by uniqueness, so our most unique
# comment is first.
self.assertEqual(annotations[0], {
"path": "bigfile",
"annotation_level": "warning",
"message": "Insightful comment",
"start_line": 2,
"end_line": 2,
})
# This comment appears 3 times.
self.assertEqual(annotations[1], {
"path": "bigfile",
"annotation_level": "warning",
"message": "Useful comment",
"start_line": 1,
"end_line": 1,
})
# The rest.
self.assertEqual(annotations[4], {
"path": "bigfile",
"annotation_level": "warning",
"message": "Annoying comment",
"start_line": 1,
"end_line": 1,
})
class TestGithubDriverEnterise(ZuulGithubAppTestCase):
config_file = 'zuul-github-driver-enterprise.conf'

View File

@ -22,7 +22,8 @@ import queue
import threading
import time
import json
from collections import OrderedDict
from collections import OrderedDict, defaultdict
from itertools import chain
from json.decoder import JSONDecodeError
from typing import List, Optional
@ -2165,7 +2166,30 @@ class GithubConnection(CachedBranchConnection):
continue
annotations.append(annotation)
return annotations
def _sortUniqueness(annotations):
"""
This method is intended to spread the annotations by uniqueness
so we have an as diverse as possible list of annotations.
"""
# First group
per_message = defaultdict(list)
for annotation in annotations:
per_message[annotation['message']].append(annotation)
# Sort the lists by length. This way we get the messages that
# occur less frequently at first.
annotation_lists = sorted(list(per_message.values()),
key=lambda l: len(l))
return list(chain(*annotation_lists))
if len(annotations) > 50:
# We cannot report more than 50 file comments so sort them by
# uniqueness in order to give the user a diverse set of annotations
annotations = _sortUniqueness(annotations)
return annotations[:50]
def getPushedFileNames(self, event):
files = set()