Report max 50 file comments to github

The github api limits us to report 50 file comments per api request. Since the check run reporting is in the critical path don't do paging but limit this to 50 comments. In order to get a diverse feedback sort the messages by 'uniqueness' first so that 100 equal comments don't push out 5 other comments that are valuable. Change-Id: If5a7c246a58b510d8fcfa31bd71d74c84acbc278
2020-11-16 17:20:41 +01:00 · 2020-11-16 17:20:41 +01:00 · 808db0aec9
parent ab9e808def
commit 808db0aec9
4 changed files with 177 additions and 2 deletions
--- a/tests/fixtures/config/github-file-comments/git/org_project/playbooks/file-comments-big.yaml
+++ b/tests/fixtures/config/github-file-comments/git/org_project/playbooks/file-comments-big.yaml
@ -0,0 +1,74 @@
+- hosts: all
+  tasks:
+    - zuul_return:
+        data:
+          zuul:
+            file_comments:
+              bigfile:
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                # 10
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                # 20
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                # 30
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                # 40
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                # 50
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Annoying comment"}
+                - {line: 1, message: "Useful comment"}
+                - {line: 1, message: "Useful comment"}
+                - {line: 2, message: "Useful comment"}
+                - {line: 2, message: "Insightful comment"}
--- a/tests/fixtures/config/github-file-comments/git/org_project/zuul.yaml
+++ b/tests/fixtures/config/github-file-comments/git/org_project/zuul.yaml
@ -2,11 +2,19 @@
    parent: base
    name: file-comments
    run: playbooks/file-comments.yaml
+    files: README

 - job:
    parent: base
    name: file-comments-error
    run: playbooks/file-comments-error.yaml
+    files: README
+
+- job:
+    parent: base
+    name: file-comments-big
+    run: playbooks/file-comments-big.yaml
+    files: bigfile

 - project:
    name: org/project
@ -14,3 +22,4 @@
      jobs:
        - file-comments
        - file-comments-error
+        - file-comments-big
--- a/tests/unit/test_github_driver.py
+++ b/tests/unit/test_github_driver.py
@ -2154,6 +2154,74 @@ class TestCheckRunAnnotations(ZuulGithubAppTestCase, AnsibleZuulTestCase):
            "end_line": 3,
        })

+    def test_many_file_comments(self):
+        # Test that we only send 50 comments to github
+        project = "org/project"
+        github = self.fake_github.getGithubClient(None)
+
+        # The file must be part of this PR to make the comment function
+        # work. Thus we change it's content to provide some more text.
+        files_dict = {
+            "bigfile": textwrap.dedent(
+                """
+                section one
+                ===========
+
+                here is some text
+                """
+            ),
+        }
+
+        A = self.fake_github.openFakePullRequest(
+            project, "master", "A", files=files_dict
+        )
+        self.fake_github.emitEvent(A.getPullRequestOpenedEvent())
+        self.waitUntilSettled()
+
+        # We should have a pending check for the head sha
+        self.assertIn(
+            A.head_sha, github.repo_from_project(project)._commits.keys())
+        check_runs = self.fake_github.getCommitChecks(project, A.head_sha)
+
+        self.assertEqual(1, len(check_runs))
+        check_run = check_runs[0]
+
+        self.assertEqual("tenant-one/check", check_run["name"])
+        self.assertEqual("completed", check_run["status"])
+        self.assertThat(
+            check_run["output"]["summary"],
+            MatchesRegex(r'.*Build succeeded.*', re.DOTALL)
+        )
+
+        annotations = check_run["output"]["annotations"]
+        self.assertEqual(50, len(annotations))
+
+        # Comments are sorted by uniqueness, so our most unique
+        # comment is first.
+        self.assertEqual(annotations[0], {
+            "path": "bigfile",
+            "annotation_level": "warning",
+            "message": "Insightful comment",
+            "start_line": 2,
+            "end_line": 2,
+        })
+        # This comment appears 3 times.
+        self.assertEqual(annotations[1], {
+            "path": "bigfile",
+            "annotation_level": "warning",
+            "message": "Useful comment",
+            "start_line": 1,
+            "end_line": 1,
+        })
+        # The rest.
+        self.assertEqual(annotations[4], {
+            "path": "bigfile",
+            "annotation_level": "warning",
+            "message": "Annoying comment",
+            "start_line": 1,
+            "end_line": 1,
+        })
+

 class TestGithubDriverEnterise(ZuulGithubAppTestCase):
    config_file = 'zuul-github-driver-enterprise.conf'
--- a/zuul/driver/github/githubconnection.py
+++ b/zuul/driver/github/githubconnection.py
@ -22,7 +22,8 @@ import queue
 import threading
 import time
 import json
-from collections import OrderedDict
+from collections import OrderedDict, defaultdict
+from itertools import chain
 from json.decoder import JSONDecodeError
 from typing import List, Optional

@ -2165,7 +2166,30 @@ class GithubConnection(CachedBranchConnection):
                    continue

                annotations.append(annotation)
-        return annotations
+
+        def _sortUniqueness(annotations):
+            """
+            This method is intended to spread the annotations by uniqueness
+            so we have an as diverse as possible list of annotations.
+            """
+            # First group
+            per_message = defaultdict(list)
+            for annotation in annotations:
+                per_message[annotation['message']].append(annotation)
+
+            # Sort the lists by length. This way we get the messages that
+            # occur less frequently at first.
+            annotation_lists = sorted(list(per_message.values()),
+                                      key=lambda l: len(l))
+
+            return list(chain(*annotation_lists))
+
+        if len(annotations) > 50:
+            # We cannot report more than 50 file comments so sort them by
+            # uniqueness in order to give the user a diverse set of annotations
+            annotations = _sortUniqueness(annotations)
+
+        return annotations[:50]

    def getPushedFileNames(self, event):
        files = set()