From 526a7ed9af678439bc1ae5a24504d18b20df36b3 Mon Sep 17 00:00:00 2001 From: Simon Westphahl Date: Tue, 18 Sep 2018 08:23:58 +0200 Subject: [PATCH] Use merger to get list of files for pull-request The Github pull requests files API only returns at max the first 300 changed files of a PR in alphabetical order. In case the PR has more than 300 changed files, it might be that Zuul is not considering changes to the config and not using them speculatively due to the incomplete file list. Also jobs using file filters might not be considered in case the relevant files are not in the list of files for a change. Change-Id: I10a593e26ac85b8c12ca9c82051cad809382f50a --- tests/base.py | 9 +++++---- tests/fakegithub.py | 7 +++++-- tests/unit/test_github_driver.py | 13 +++++++++++++ zuul/driver/github/githubconnection.py | 23 +++++++++++++++++++++++ zuul/merger/merger.py | 13 ++++++++++--- 5 files changed, 56 insertions(+), 9 deletions(-) diff --git a/tests/base.py b/tests/base.py index d61cdca886..3cb2ce3569 100644 --- a/tests/base.py +++ b/tests/base.py @@ -980,11 +980,11 @@ class FakeGithubPullRequest(object): def _createPRRef(self): repo = self._getRepo() GithubChangeReference.create( - repo, self._getPRReference(), 'refs/tags/init') + repo, self.getPRReference(), 'refs/tags/init') def _addCommitToRepo(self, files=[], reset=False): repo = self._getRepo() - ref = repo.references[self._getPRReference()] + ref = repo.references[self.getPRReference()] if reset: self.number_of_commits = 0 ref.set_object('refs/tags/init') @@ -1007,6 +1007,7 @@ class FakeGithubPullRequest(object): repo.index.add([fn]) self.head_sha = repo.index.commit(msg).hexsha + repo.create_head(self.getPRReference(), self.head_sha, force=True) # Create an empty set of statuses for the given sha, # each sha on a PR may have a status set on it self.statuses[self.head_sha] = [] @@ -1020,7 +1021,7 @@ class FakeGithubPullRequest(object): def getPRHeadSha(self): repo = self._getRepo() - return repo.references[self._getPRReference()].commit.hexsha + return repo.references[self.getPRReference()].commit.hexsha def addReview(self, user, state, granted_on=None): gh_time_format = '%Y-%m-%dT%H:%M:%SZ' @@ -1056,7 +1057,7 @@ class FakeGithubPullRequest(object): 'submitted_at': submitted_at, }) - def _getPRReference(self): + def getPRReference(self): return '%s/head' % self.number def _getPullRequestEvent(self, action): diff --git a/tests/fakegithub.py b/tests/fakegithub.py index 86d680ce36..581b306863 100644 --- a/tests/fakegithub.py +++ b/tests/fakegithub.py @@ -250,8 +250,9 @@ class FakePull(object): return FakeIssue(self._fake_pull_request) def files(self): + # Github lists max. 300 files of a PR in alphabetical order return [FakeFile(fn) - for fn in self._fake_pull_request.files] + for fn in sorted(self._fake_pull_request.files)][:300] @property def head(self): @@ -284,12 +285,14 @@ class FakePull(object): 'state': pr.state, 'head': { 'sha': pr.head_sha, + 'ref': pr.getPRReference(), 'repo': { 'full_name': pr.project } }, 'merged': pr.is_merged, - 'body': pr.body + 'body': pr.body, + 'changed_files': len(pr.files), } return data diff --git a/tests/unit/test_github_driver.py b/tests/unit/test_github_driver.py index fb807a9947..c028798c83 100644 --- a/tests/unit/test_github_driver.py +++ b/tests/unit/test_github_driver.py @@ -101,6 +101,19 @@ class TestGithubDriver(ZuulTestCase): self.waitUntilSettled() self.assertEqual(1, len(self.history)) + @simple_layout('layouts/files-github.yaml', driver='github') + def test_pull_changed_files_length_mismatch(self): + files = {'{:03d}.txt'.format(n): 'test' for n in range(300)} + # File 301 which is not included in the list of files of the PR, + # since Github only returns max. 300 files in alphabetical order + files["foobar-requires"] = "test" + A = self.fake_github.openFakePullRequest( + 'org/project', 'master', 'A', files=files) + + self.fake_github.emitEvent(A.getPullRequestOpenedEvent()) + self.waitUntilSettled() + self.assertEqual(1, len(self.history)) + @simple_layout('layouts/basic-github.yaml', driver='github') def test_comment_event(self): A = self.fake_github.openFakePullRequest('org/project', 'master', 'A') diff --git a/zuul/driver/github/githubconnection.py b/zuul/driver/github/githubconnection.py index 81a35560b4..3297cc4f3c 100644 --- a/zuul/driver/github/githubconnection.py +++ b/zuul/driver/github/githubconnection.py @@ -864,12 +864,35 @@ class GithubConnection(BaseConnection): return changes + def getFilesChanges(self, project_name, head, base): + job = self.sched.merger.getFilesChanges(self.connection_name, + project_name, + head, base) + self.log.debug("Waiting for fileschanges job %s", job) + job.wait() + if not job.updated: + raise Exception("Fileschanges job {} failed".format(job)) + self.log.debug("Fileschanges job %s got changes on files %s", + job, job.files) + return job.files + def _updateChange(self, change): self.log.info("Updating %s" % (change,)) change.pr = self.getPull(change.project.name, change.number) change.ref = "refs/pull/%s/head" % change.number change.branch = change.pr.get('base').get('ref') change.files = change.pr.get('files') + # Github's pull requests files API only returns at max + # the first 300 changed files of a PR in alphabetical order. + # https://developer.github.com/v3/pulls/#list-pull-requests-files + if len(change.files) < change.pr.get('changed_files', 0): + self.log.warning("Got only %s files but PR has %s files.", + len(change.files), + change.pr.get('changed_files', 0)) + change.files = self.getFilesChanges( + change.project.name, + change.ref, + change.branch) change.title = change.pr.get('title') change.open = change.pr.get('state') == 'open' change.is_merged = change.pr.get('merged') diff --git a/zuul/merger/merger.py b/zuul/merger/merger.py index ab9a4ac0e7..157e6118b8 100644 --- a/zuul/merger/merger.py +++ b/zuul/merger/merger.py @@ -401,12 +401,19 @@ class Repo(object): def getFilesChanges(self, branch, tosha=None): repo = self.createRepoObject() + self.fetch(branch) + head = repo.commit(self.revParse('FETCH_HEAD')) files = set() - head = repo.heads[branch].commit + + merge_bases = [] + if tosha is not None: + merge_bases = repo.merge_base(head, tosha) + files.update(set(head.stats.files.keys())) - if tosha: + if merge_bases: + hexsha_list = [b.hexsha for b in merge_bases] for cmt in head.iter_parents(): - if cmt.hexsha == tosha: + if cmt.hexsha in hexsha_list: break files.update(set(cmt.stats.files.keys())) return list(files)