From 83281e33565dfe872c3b82bb4a80733971ff216e Mon Sep 17 00:00:00 2001 From: Simon Westphahl Date: Mon, 21 Sep 2020 16:12:26 +0200 Subject: [PATCH] Fetch can-merge info when updating a pull-request Instead of synchronously checking via the GitHub API if a pull request is mergeable, we can already get the necessary information during Github event processing so the canMerge() check operates on cached data similar to how this is done for the Gerrit driver already. With this we can also remove the additional API requests for the commit status and check runs since this info can be retrieved via the same GraphQL request. Change-Id: I6b4848dcb5d27071deeb6a59642c0e3c03a799da --- tests/fake_graphql.py | 31 ++++- tests/fakegithub.py | 27 ++++- zuul/driver/github/githubconnection.py | 110 ++++++++---------- zuul/driver/github/githubmodel.py | 21 ++++ zuul/driver/github/graphql/__init__.py | 8 +- .../github/graphql/canmerge-legacy.graphql | 7 ++ zuul/driver/github/graphql/canmerge.graphql | 7 ++ 7 files changed, 142 insertions(+), 69 deletions(-) diff --git a/tests/fake_graphql.py b/tests/fake_graphql.py index 9e86f81adc..6c75679100 100644 --- a/tests/fake_graphql.py +++ b/tests/fake_graphql.py @@ -72,9 +72,14 @@ class FakeBranchProtectionRules(ObjectType): return parent.values() +class FakeActor(ObjectType): + login = String() + + class FakeStatusContext(ObjectType): state = String() context = String() + creator = Field(FakeActor) def resolve_state(parent, info): state = parent.state.upper() @@ -83,6 +88,9 @@ class FakeStatusContext(ObjectType): def resolve_context(parent, info): return parent.context + def resolve_creator(parent, info): + return parent.creator + class FakeStatus(ObjectType): contexts = List(FakeStatusContext) @@ -99,7 +107,9 @@ class FakeCheckRun(ObjectType): return parent.name def resolve_conclusion(parent, info): - return parent.conclusion.upper() + if parent.conclusion: + return parent.conclusion.upper() + return None class FakeCheckRuns(ObjectType): @@ -109,11 +119,28 @@ class FakeCheckRuns(ObjectType): return parent +class FakeApp(ObjectType): + slug = String() + name = String() + + class FakeCheckSuite(ObjectType): + app = Field(FakeApp) checkRuns = Field(FakeCheckRuns, first=Int()) + def resolve_app(parent, info): + if not parent: + return None + return parent[0].app + def resolve_checkRuns(parent, info, first=None): - return parent + # We only want to return the latest result for a check run per app. + # Since the check runs are ordered from latest to oldest result we + # need to traverse the list in reverse order. + check_runs_by_name = { + "{}:{}".format(cr.app, cr.name): cr for cr in reversed(parent) + } + return check_runs_by_name.values() class FakeCheckSuites(ObjectType): diff --git a/tests/fakegithub.py b/tests/fakegithub.py index 89b3ec73ab..7caf5377d8 100644 --- a/tests/fakegithub.py +++ b/tests/fakegithub.py @@ -65,13 +65,18 @@ class FakeBranch(object): } +class FakeCreator: + def __init__(self, login): + self.login = login + + class FakeStatus(object): def __init__(self, state, url, description, context, user): self.state = state self.context = context + self.creator = FakeCreator(user) self._url = url self._description = description - self._user = user def as_dict(self): return { @@ -80,11 +85,17 @@ class FakeStatus(object): 'description': self._description, 'context': self.context, 'creator': { - 'login': self._user + 'login': self.creator.login } } +class FakeApp: + def __init__(self, name, slug): + self.name = name + self.slug = slug + + class FakeCheckRun(object): def __init__(self, name, details_url, output, status, conclusion, completed_at, external_id, actions, app): @@ -98,7 +109,7 @@ class FakeCheckRun(object): self.completed_at = completed_at self.external_id = external_id self.actions = actions - self.app = app + self.app = FakeApp(name=app, slug=app) # Github automatically sets the status to "completed" if a conclusion # is provided. @@ -118,7 +129,8 @@ class FakeCheckRun(object): "external_id": self.external_id, "actions": self.actions, "app": { - "slug": self.app, + "slug": self.app.slug, + "name": self.app.name, }, } @@ -719,6 +731,13 @@ class FakeGithubClient(object): def pull_request(self, owner, project, number): fake_pr = self._data.pull_requests[int(number)] + repo = self.repository(owner, project) + # Ensure a commit for the head_sha exists so this can be resolved in + # graphql queries. + repo._commits.setdefault( + fake_pr.head_sha, + FakeCommit(fake_pr.head_sha) + ) return FakePull(fake_pr) def search_issues(self, query): diff --git a/zuul/driver/github/githubconnection.py b/zuul/driver/github/githubconnection.py index 0c72b05647..8f306ee8fc 100644 --- a/zuul/driver/github/githubconnection.py +++ b/zuul/driver/github/githubconnection.py @@ -1458,8 +1458,6 @@ class GithubConnection(BaseConnection): if not change.is_merged: change.is_merged = change.pr.get('merged') - change.status = self._get_statuses( - change.project, change.patchset, event) change.reviews = self.getPullReviews( pr_obj, change.project, change.number, event) change.labels = change.pr.get('labels') @@ -1492,11 +1490,38 @@ class GithubConnection(BaseConnection): self.server, change.project.name, change.number), ] + self._updateCanMergeInfo(change, event) + if self.sched: self.sched.onChangeUpdated(change, event) return change + def _updateCanMergeInfo(self, change, event): + # NOTE: The 'mergeable' field may get a false (null) while GitHub is + # calculating if it can merge. The Github API will just return + # that as false. This could lead to false negatives. So don't get this + # field here and only evaluate branch protection settings. Any merge + # conflicts which would block merging finally will be detected by + # the zuul-mergers anyway. + github = self.getGithubClient(change.project.name, zuul_event_id=event) + + # Append accept headers so we get the draft status and checks api + self._append_accept_header(github, PREVIEW_DRAFT_ACCEPT) + self._append_accept_header(github, PREVIEW_CHECKS_ACCEPT) + + # For performance reasons fetch all needed data upfront using a + # single graphql call. + canmerge_data = self.graphql_client.fetch_canmerge( + github, change, zuul_event_id=event) + + change.contexts = self._get_contexts(canmerge_data) + change.draft = canmerge_data.get('isDraft', False) + change.review_decision = canmerge_data['reviewDecision'] + change.required_contexts = set( + canmerge_data['requiredStatusCheckContexts'] + ) + def getGitUrl(self, project: Project): if self.git_ssh_key: return 'ssh://git@%s/%s.git' % (self.server, project.name) @@ -1638,40 +1663,21 @@ class GithubConnection(BaseConnection): return (pr, probj) def canMerge(self, change, allow_needs, event=None): - # NOTE: The mergeable call may get a false (null) while GitHub is - # calculating if it can merge. The github3.py library will just return - # that as false. This could lead to false negatives. So don't do this - # call here and only evaluate branch protection settings. Any merge - # conflicts which would block merging finally will be detected by - # the zuul-mergers anyway. - log = get_annotated_logger(self.log, event) - github = self.getGithubClient(change.project.name, zuul_event_id=event) - - # Append accept headers so we get the draft status and checks api - self._append_accept_header(github, PREVIEW_DRAFT_ACCEPT) - self._append_accept_header(github, PREVIEW_CHECKS_ACCEPT) - - # For performance reasons fetch all needed data for canMerge upfront - # using a single graphql call. - canmerge_data = self.graphql_client.fetch_canmerge( - github, change, zuul_event_id=event) - # If the PR is a draft it cannot be merged. - if canmerge_data.get('isDraft', False): + if change.draft: log.debug('Change %s can not merge because it is a draft', change) return False missing_status_checks = self._getMissingStatusChecks( - allow_needs, canmerge_data) + change, allow_needs) if missing_status_checks: log.debug('Change %s can not merge because required status checks ' 'are missing: %s', change, missing_status_checks) return False - review_decision = canmerge_data['reviewDecision'] - if review_decision and review_decision != 'APPROVED': + if change.review_decision and change.review_decision != 'APPROVED': # If we got a review decision it must be approved log.debug('Change %s can not merge because it is not approved', change) @@ -1788,23 +1794,19 @@ class GithubConnection(BaseConnection): return resp.json() @staticmethod - def _getMissingStatusChecks(allow_needs, canmerge_data): - required_contexts = canmerge_data['requiredStatusCheckContexts'] - if not required_contexts: + def _getMissingStatusChecks(change, allow_needs): + if not change.required_contexts: # There are no required contexts -> ok by definition return set() # Strip allow_needs as we will set this in the gate ourselves required_contexts = set( - [x for x in required_contexts if x not in allow_needs]) - - # Get successful statuses - successful = set([s[0] for s in canmerge_data['status'].items() - if s[1] == 'SUCCESS']) + x for x in change.required_contexts if x not in allow_needs + ) # Remove successful checks from the required contexts to get the # remaining missing required status. - return required_contexts.difference(successful) + return required_contexts.difference(change.successful_contexts) @cachetools.cached(cache=cachetools.TTLCache(maxsize=2048, ttl=3600), key=lambda self, login, project: @@ -2201,34 +2203,14 @@ class GithubConnection(BaseConnection): def _ghTimestampToDate(self, timestamp): return time.strptime(timestamp, '%Y-%m-%dT%H:%M:%SZ') - def _get_statuses(self, project, sha, event): - # A ref can have more than one status from each context, - # however the API returns them in order, newest first. - # So we can keep track of which contexts we've already seen - # and throw out the rest. Our unique key is based on - # the user and the context, since context is free form and anybody - # can put whatever they want there. We want to ensure we track it - # by user, so that we can require/trigger by user too. - seen = [] - statuses = [] - for status in self.getCommitStatuses(project.name, sha, event): - stuple = _status_as_tuple(status) - if "%s:%s" % (stuple[0], stuple[1]) not in seen: - statuses.append("%s:%s:%s" % stuple) - seen.append("%s:%s" % (stuple[0], stuple[1])) - - # Although Github differentiates commit statuses and commit checks via - # their respective APIs, the branch protection the status section - # (below the comments of a PR) do not differentiate between both. Thus, - # to mimic this behaviour also in Zuul, a required_status in the - # pipeline config could map to either a status or a check. - for check in self.getCommitChecks(project.name, sha, event): - ctuple = _check_as_tuple(check) - if "{}:{}".format(ctuple[0], ctuple[1]) not in seen: - statuses.append("{}:{}:{}".format(*ctuple)) - seen.append("{}:{}".format(ctuple[0], ctuple[1])) - - return statuses + def _get_contexts(self, canmerge_data): + contexts = set( + _status_as_tuple(s) for s in canmerge_data["status"].values() + ) + contexts.update(set( + _check_as_tuple(c) for c in canmerge_data["checks"].values() + )) + return contexts def getWebController(self, zuul_web): return GithubWebController(zuul_web, self) @@ -2350,6 +2332,9 @@ def _status_as_tuple(status): user = creator.get('login') context = status.get('context') state = status.get('state') + # Normalize state to lowercase as the Graphql and REST API are not + # consistent in this regard. + state = state.lower() if state else state return (user, context, state) @@ -2365,4 +2350,7 @@ def _check_as_tuple(check): slug = "Unknown" name = check.get("name") conclusion = check.get("conclusion") + # Normalize conclusion to lowercase as the Graphql and REST API are not + # consistent in this regard. + conclusion = conclusion.lower() if conclusion else conclusion return (slug, name, conclusion) diff --git a/zuul/driver/github/githubmodel.py b/zuul/driver/github/githubmodel.py index f5945a6c63..8463d07f4d 100644 --- a/zuul/driver/github/githubmodel.py +++ b/zuul/driver/github/githubmodel.py @@ -38,6 +38,27 @@ class PullRequest(Change): self.reviews = [] self.files = [] self.labels = [] + self.draft = None + self.review_decision = None + self.required_contexts = set() + self.contexts = set() + + @property + def status(self): + return ["{}:{}:{}".format(*c) for c in self.contexts] + + @status.setter + def status(self, value): + # Dummy setter to ignore init to None in base class. + pass + + @property + def successful_contexts(self) -> set: + if not self.contexts: + return set() + return set( + s[1] for s in self.contexts if s[2] == 'success' + ) def isUpdateOf(self, other): if (self.project == other.project and diff --git a/zuul/driver/github/graphql/__init__.py b/zuul/driver/github/graphql/__init__.py index 1704d7efca..d671e9e90c 100644 --- a/zuul/driver/github/graphql/__init__.py +++ b/zuul/driver/github/graphql/__init__.py @@ -126,11 +126,15 @@ class GraphQLClient: # afterwards status = commit.get('status') or {} for context in status.get('contexts', []): - result['status'][context['context']] = context['state'] + result['status'][context['context']] = context # Add check runs + result['checks'] = {} for suite in nested_get(commit, 'checkSuites', 'nodes', default=[]): for run in nested_get(suite, 'checkRuns', 'nodes', default=[]): - result['status'][run['name']] = run['conclusion'] + result['checks'][run['name']] = { + **run, + "app": suite.get("app") + } return result diff --git a/zuul/driver/github/graphql/canmerge-legacy.graphql b/zuul/driver/github/graphql/canmerge-legacy.graphql index edc1b0fcd7..e86b3c7def 100644 --- a/zuul/driver/github/graphql/canmerge-legacy.graphql +++ b/zuul/driver/github/graphql/canmerge-legacy.graphql @@ -25,6 +25,10 @@ query canMergeData( ... on Commit { checkSuites(first: 100) { nodes { + app { + name + slug + } checkRuns(first: 100) { nodes { name @@ -35,6 +39,9 @@ query canMergeData( } status { contexts { + creator { + login + } state context } diff --git a/zuul/driver/github/graphql/canmerge.graphql b/zuul/driver/github/graphql/canmerge.graphql index baa4a9f814..7b7e862fdd 100644 --- a/zuul/driver/github/graphql/canmerge.graphql +++ b/zuul/driver/github/graphql/canmerge.graphql @@ -26,6 +26,10 @@ query canMergeData( ... on Commit { checkSuites(first: 100) { nodes { + app { + name + slug + } checkRuns(first: 100) { nodes { name @@ -36,6 +40,9 @@ query canMergeData( } status { contexts { + creator { + login + } state context }