Add pagination to Github graphql canmerge query

This adds pagination support to the Github graphql canmerge query.

There are three items that could theoretically be paginated:

* The list of branch protection rules
* The list of check suites for the commit
* The list of check runs for each suite

We can get 100 of each of these in one query, which works most of the
time, but if these values are exceeded, we may have incomplete
information and may not be able to determine if a change is mergeable.

To account for that, the canmerge query is updated to support
pagination on these three items.  Like the paginated branch protection
rule query, this uses Github global object ids to perform simpler
queries for the second and later pages.

The fake graphql server has been extended to support the additional
interfaces and connections.

One notable quirk: even though everything in the API suggests that it
should be possible to have more than one check run with the same name,
Github itself appears to discard older check runs with the same name
and only returns a single check run (the latest) for a given name.
The existing test infrastructure reflected that, so comments
clarifying the assumption have been added.

The "legacy" canmerge query corresponded to Github Enterprise < 2.21.
That was deprecated on 2021-06-09 and is no longer supported.  To
simplify the implementation, support for the legacy canmerge query is
removed.

Change-Id: I997ebd731bbb502c636580b7e13b4f2940f1be6d
This commit is contained in:
James E. Blair
2024-09-14 15:08:40 -07:00
parent cddc0e096e
commit 108977ca47
10 changed files with 357 additions and 178 deletions

View File

@@ -0,0 +1,4 @@
---
upgrade:
- |
Github Enterprise versions older than 2.21 are not supported.

View File

@@ -1,4 +1,5 @@
# Copyright 2019 BMW Group
# Copyright 2024 Acme Gating, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
@@ -12,7 +13,16 @@
# License for the specific language governing permissions and limitations
# under the License.
from graphene import Boolean, Field, Int, List, ObjectType, String, Schema
from graphene import (
Boolean,
Field,
Int,
Interface,
List,
ObjectType,
Schema,
String,
)
class ID(String):
@@ -20,18 +30,32 @@ class ID(String):
pass
class PageInfo(ObjectType):
end_cursor = String()
has_previous_page = Boolean()
has_next_page = Boolean()
class Node(Interface):
id = ID(required=True)
def resolve_end_cursor(parent, info):
@classmethod
def resolve_type(cls, instance, info):
kind = getattr(instance, '_graphene_type', None)
if kind == 'BranchProtectionRule':
return BranchProtectionRule
elif kind == 'Commit':
return Commit
elif kind == 'CheckSuite':
return CheckSuite
class PageInfo(ObjectType):
endCursor = String()
hasPreviousPage = Boolean()
hasNextPage = Boolean()
def resolve_endCursor(parent, info):
return str(parent['after'] + parent['first'])
def resolve_has_previous_page(parent, info):
def resolve_hasPreviousPage(parent, info):
return parent['after'] > 0
def resolve_has_next_page(parent, info):
def resolve_hasNextPage(parent, info):
return parent['after'] + parent['first'] < parent['length']
@@ -54,12 +78,17 @@ class MatchingRefs(ObjectType):
class BranchProtectionRule(ObjectType):
id = ID()
class Meta:
interfaces = (Node, )
id = ID(required=True)
pattern = String()
requiredStatusCheckContexts = List(String)
requiresApprovingReviews = Boolean()
requiresCodeOwnerReviews = Boolean()
matchingRefs = Field(MatchingRefs, first=Int(), after=String())
matchingRefs = Field(MatchingRefs, first=Int(), after=String(),
query=String())
lockBranch = Boolean()
def resolve_id(parent, info):
@@ -80,11 +109,13 @@ class BranchProtectionRule(ObjectType):
def resolve_lockBranch(parent, info):
return parent.lock_branch
def resolve_matchingRefs(parent, info, first, after=None):
def resolve_matchingRefs(parent, info, first, after=None, query=None):
if after is None:
after = '0'
after = int(after)
values = parent.matching_refs
if query:
values = [v for v in values if v == query]
return dict(
length=len(values),
nodes=values[after:after + first],
@@ -132,6 +163,11 @@ class Status(ObjectType):
class CheckRun(ObjectType):
class Meta:
interfaces = (Node, )
id = ID(required=True)
name = String()
conclusion = String()
@@ -146,8 +182,12 @@ class CheckRun(ObjectType):
class CheckRuns(ObjectType):
nodes = List(CheckRun)
pageInfo = Field(PageInfo)
def resolve_nodes(parent, info):
return parent['nodes']
def resolve_pageInfo(parent, info):
return parent
@@ -157,43 +197,61 @@ class App(ObjectType):
class CheckSuite(ObjectType):
class Meta:
interfaces = (Node, )
id = ID(required=True)
app = Field(App)
checkRuns = Field(CheckRuns, first=Int())
checkRuns = Field(CheckRuns, first=Int(), after=String())
def resolve_app(parent, info):
if not parent:
if not parent.runs:
return None
return parent[0].app
return parent.runs[0].app
def resolve_checkRuns(parent, info, first=None):
# We only want to return the latest result for a check run per app.
def resolve_checkRuns(parent, info, first, after=None):
# Github will only return the single most recent check run for
# a given name (this is true for REST or graphql), despite the
# format of the result being a list.
# Since the check runs are ordered from latest to oldest result we
# need to traverse the list in reverse order.
check_runs_by_name = {
"{}:{}".format(cr.app, cr.name): cr for cr in reversed(parent)
"{}:{}".format(cr.app.name, cr.name):
cr for cr in reversed(parent.runs)
}
return check_runs_by_name.values()
if after is None:
after = '0'
after = int(after)
values = list(check_runs_by_name.values())
return dict(
length=len(values),
nodes=values[after:after + first],
first=first,
after=after,
)
class CheckSuites(ObjectType):
nodes = List(CheckSuite)
pageInfo = Field(PageInfo)
def resolve_nodes(parent, info):
# Note: we only use a single check suite in the tests so return a
# single item to keep it simple.
return [parent]
return parent['nodes']
def resolve_pageInfo(parent, info):
return parent
class Commit(ObjectType):
class Meta:
# Graphql object type that defaults to the class name, but we require
# 'Commit'.
name = 'Commit'
interfaces = (Node, )
id = ID(required=True)
status = Field(Status)
checkSuites = Field(CheckSuites, first=Int())
checkSuites = Field(CheckSuites, first=Int(), after=String())
def resolve_status(parent, info):
seen = set()
@@ -205,9 +263,18 @@ class Commit(ObjectType):
# Github returns None if there are no results
return result or None
def resolve_checkSuites(parent, info, first=None):
# Tests only utilize one check suite so return all runs for that.
return parent._check_runs
def resolve_checkSuites(parent, info, first, after=None):
if after is None:
after = '0'
after = int(after)
# Each value is a list of check runs for that suite
values = list(parent._check_suites.values())
return dict(
length=len(values),
nodes=values[after:after + first],
first=first,
after=after,
)
class PullRequest(ObjectType):
@@ -275,7 +342,7 @@ class Repository(ObjectType):
class FakeGithubQuery(ObjectType):
repository = Field(Repository, owner=String(required=True),
name=String(required=True))
node = Field(BranchProtectionRule, id=ID(required=True))
node = Field(Node, id=ID(required=True))
def resolve_repository(root, info, owner, name):
return info.context._data.repos.get((owner, name))
@@ -285,6 +352,12 @@ class FakeGithubQuery(ObjectType):
for rule in repo._branch_protection_rules.values():
if rule.id == id:
return rule
for commit in repo._commits.values():
if commit.id == id:
return commit
for suite in commit._check_suites.values():
if suite.id == id:
return suite
def getGrapheneSchema():

View File

@@ -1,6 +1,5 @@
#!/usr/bin/env python
# Copyright 2018 Red Hat, Inc.
# Copyright 2024 Acme Gating, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
@@ -516,13 +515,23 @@ class FakeApp:
self.slug = slug
class FakeCheckRun(object):
def __init__(self, id, name, details_url, output, status, conclusion,
class FakeCheckSuite:
_graphene_type = 'CheckSuite'
def __init__(self):
self.id = str(uuid.uuid4())
self.runs = []
class FakeCheckRun:
_graphene_type = 'CheckRun'
def __init__(self, name, details_url, output, status, conclusion,
completed_at, external_id, actions, app):
if actions is None:
actions = []
self.id = id
self.id = str(uuid.uuid4())
self.name = name
self.details_url = details_url
self.output = output
@@ -585,11 +594,14 @@ class FakeCombinedStatus(object):
self.statuses = statuses
class FakeCommit(object):
class FakeCommit:
_graphene_type = 'Commit'
def __init__(self, sha):
self._statuses = []
self.sha = sha
self._check_runs = []
self._check_suites = defaultdict(FakeCheckSuite)
self.id = str(uuid.uuid4())
def set_status(self, state, url, description, context, user):
status = FakeStatus(
@@ -598,10 +610,9 @@ class FakeCommit(object):
# the last status provided for a commit.
self._statuses.insert(0, status)
def set_check_run(self, id, name, details_url, output, status, conclusion,
def set_check_run(self, name, details_url, output, status, conclusion,
completed_at, external_id, actions, app):
check_run = FakeCheckRun(
id,
name,
details_url,
output,
@@ -614,7 +625,8 @@ class FakeCommit(object):
)
# Always insert a check_run to the front of the list to represent the
# last check_run provided for a commit.
self._check_runs.insert(0, check_run)
check_suite = self._check_suites[app]
check_suite.runs.insert(0, check_run)
return check_run
def get_url(self, path, params=None):
@@ -622,7 +634,7 @@ class FakeCommit(object):
statuses = [s.as_dict() for s in self._statuses]
return FakeResponse(statuses)
if path == "check-runs":
check_runs = [c.as_dict() for c in self._check_runs]
check_runs = [c.as_dict() for c in self.check_runs()]
resp = {"total_count": len(check_runs), "check_runs": check_runs}
return FakeResponse(resp)
@@ -630,7 +642,10 @@ class FakeCommit(object):
return self._statuses
def check_runs(self):
return self._check_runs
check_runs = []
for suite in self._check_suites.values():
check_runs.extend(suite.runs)
return check_runs
def status(self):
'''
@@ -653,7 +668,6 @@ class FakeRepository(object):
self._commits = {}
self.data = data
self.name = name
self.check_run_counter = 0
# Simple dictionary to store permission values per feature (e.g.
# checks, Repository contents, Pull requests, Commit statuses, ...).
@@ -691,7 +705,6 @@ class FakeRepository(object):
return
rule = self._branch_protection_rules[branch_name]
rule.id = str(uuid.uuid4())
rule.pattern = branch_name
rule.required_contexts = contexts or []
rule.require_reviews = require_review
@@ -749,9 +762,7 @@ class FakeRepository(object):
if commit is None:
commit = FakeCommit(head_sha)
self._commits[head_sha] = commit
self.check_run_counter += 1
commit.set_check_run(
str(self.check_run_counter),
name,
details_url,
output,
@@ -1112,9 +1123,7 @@ class FakeGithubSession(object):
if commit is None:
commit = FakeCommit(head_sha)
repo._commits[head_sha] = commit
repo.check_run_counter += 1
check_run = commit.set_check_run(
str(repo.check_run_counter),
json['name'],
json['details_url'],
json['output'],
@@ -1170,7 +1179,7 @@ class FakeGithubSession(object):
check_runs = [
check_run
for commit in repo._commits.values()
for check_run in commit._check_runs
for check_run in commit.check_runs()
if check_run.id == check_run_id
]
check_run = check_runs[0]
@@ -1202,8 +1211,10 @@ class FakeGithubSession(object):
class FakeBranchProtectionRule:
_graphene_type = 'BranchProtectionRule'
def __init__(self):
self.id = str(uuid.uuid4())
self.pattern = None
self.required_contexts = []
self.require_reviews = False

View File

@@ -1,4 +1,5 @@
# Copyright 2015 GoodData
# Copyright 2024 Acme Gating, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
@@ -28,7 +29,7 @@ import git
import gitdb
import github3.exceptions
from tests.fakegithub import FakeFile, FakeGithubEnterpriseClient
from tests.fakegithub import FakeFile
from zuul.driver.github.githubconnection import GithubShaCache
from zuul.zk.layout import LayoutState
from zuul.lib import strings
@@ -2686,48 +2687,6 @@ class TestGithubDriverEnterprise(ZuulGithubAppTestCase):
self.assertEqual(len(A.comments), 0)
class TestGithubDriverEnterpriseLegacy(ZuulGithubAppTestCase):
config_file = 'zuul-github-driver-enterprise.conf'
scheduler_count = 1
def setUp(self):
self.old_version = FakeGithubEnterpriseClient.version
FakeGithubEnterpriseClient.version = '2.19.0'
super().setUp()
def tearDown(self):
super().tearDown()
FakeGithubEnterpriseClient.version = self.old_version
@simple_layout('layouts/merging-github.yaml', driver='github')
def test_report_pull_merge(self):
github = self.fake_github.getGithubClient()
repo = github.repo_from_project('org/project')
repo._set_branch_protection(
'master', require_review=True)
# pipeline merges the pull request on success
A = self.fake_github.openFakePullRequest('org/project', 'master',
'PR title',
body='I shouldnt be seen',
body_text='PR body')
self.fake_github.emitEvent(A.getCommentAddedEvent('merge me'))
self.waitUntilSettled()
# Note: PR was not approved but old github does not support
# reviewDecision so this gets ignored and zuul merges nevertheless
self.assertTrue(A.is_merged)
self.assertThat(A.merge_message,
MatchesRegex(r'.*PR title\n\nPR body.*', re.DOTALL))
self.assertThat(A.merge_message,
Not(MatchesRegex(
r'.*I shouldnt be seen.*',
re.DOTALL)))
self.assertEqual(len(A.comments), 0)
class TestGithubDriverEnterpriseCache(ZuulGithubAppTestCase):
config_file = 'zuul-github-driver-enterprise.conf'
scheduler_count = 1
@@ -2931,3 +2890,40 @@ class TestGithubGraphQL(ZuulTestCase):
self.fake_github.graphql_client.fetch_branch_protection(
github, project))
self.assertEqual(num_rules * (1 + num_extra_branches), len(branches))
@simple_layout('layouts/basic-github.yaml', driver='github')
def test_graphql_query_canmerge(self):
project = self.fake_github.getProject('org/project')
github = self.fake_github.getGithubClient(project.name)
repo = github.repo_from_project('org/project')
num_rules = 101
for branch_no in range(num_rules):
repo._set_branch_protection(f'branch{branch_no}',
protected=True,
locked=True)
repo._set_branch_protection('master',
protected=True,
locked=True)
A = self.fake_github.openFakePullRequest('org/project', 'master', 'A')
num_apps = 102
num_runs = 103
for app_no in range(num_apps):
for run_no in range(num_runs):
repo.create_check_run(
A.head_sha,
f'run{run_no}',
conclusion='failure',
app=f'app{app_no}',
)
conn = self.scheds.first.sched.connections.connections["github"]
change_key = ChangeKey(conn.connection_name, "org/project",
"PullRequest", str(A.number), str(A.head_sha))
change = conn.getChange(change_key, refresh=True)
result = self.fake_github.graphql_client.fetch_canmerge(
github, change)
self.assertTrue(result['protected'])
# We should have one run from each app; the duplicate runs are
# dropped by github.
self.assertEqual(num_apps + 1, len(result['checks'].keys()))

View File

@@ -38,7 +38,9 @@ class GraphQLClient:
self.log.debug('Loading prepared graphql queries')
query_names = [
'canmerge',
'canmerge-legacy',
'canmerge-page-rules',
'canmerge-page-suites',
'canmerge-page-runs',
'branch-protection',
'branch-protection-inner',
]
@@ -66,43 +68,58 @@ class GraphQLClient:
query_name, response)
return response
def _fetch_canmerge(self, log, github, owner, repo, pull, sha):
if github.version and github.version[:2] < (2, 21):
# Github Enterprise prior to 2.21 doesn't offer the review decision
# so don't request it as this will result in an error.
query = 'canmerge-legacy'
else:
# Since GitHub Enterprise 2.21 and on github.com we can request the
# review decision state of the pull request.
query = 'canmerge'
return self._run_query(log, github, query,
owner=owner,
repo=repo,
pull=pull,
head_sha=sha)
def fetch_canmerge(self, github, change, zuul_event_id=None):
log = get_annotated_logger(self.log, zuul_event_id)
owner, repo = change.project.name.split('/')
data = self._fetch_canmerge(log, github, owner, repo, change.number,
change.patchset)
result = {}
cursor = None
matching_rules = []
repository = nested_get(data, 'data', 'repository')
# Find corresponding rule to our branch
rules = nested_get(repository, 'branchProtectionRules', 'nodes',
default=[])
while True:
if cursor is None:
# First page of branchProtectionRules
data = self._run_query(log, github, 'canmerge',
owner=owner,
repo=repo,
pull=change.number,
head_sha=change.patchset,
ref_name=change.branch,
)
repository = nested_get(data, 'data', 'repository')
else:
# Subsequent pages of branchProtectionRules
data = self._run_query(log, github, 'canmerge-page-rules',
owner=owner,
repo=repo,
cursor=cursor,
ref_name=change.branch,
)
# Find corresponding rule to our branch
rules = nested_get(
data, 'data', 'repository', 'branchProtectionRules', 'nodes',
default=[])
# Filter branch protection rules for the one matching the change.
# matchingRefs should either be empty or only contain our branch
matching_rules.extend([
rule for rule in rules
for ref in nested_get(
rule, 'matchingRefs', 'nodes', default=[])
if ref.get('name') == change.branch
])
rules_pageinfo = nested_get(
data, 'data', 'repository',
'branchProtectionRules', 'pageInfo')
if not rules_pageinfo['hasNextPage']:
break
cursor = rules_pageinfo['endCursor']
# Filter branch protection rules for the one matching the change.
matching_rules = [
rule for rule in rules
for ref in nested_get(rule, 'matchingRefs', 'nodes', default=[])
if ref.get('name') == change.branch
]
if len(matching_rules) > 1:
log.warn('More than one branch protection rules match change %s',
change)
log.warn('More than one branch protection rule '
'matches change %s', change)
return result
elif len(matching_rules) == 1:
matching_rule = matching_rules[0]
@@ -138,7 +155,7 @@ class GraphQLClient:
# Add status checks
result['status'] = {}
commit = nested_get(data, 'data', 'repository', 'object')
commit = nested_get(repository, 'object')
# Status can be explicit None so make sure we work with a dict
# afterwards
status = commit.get('status') or {}
@@ -147,8 +164,8 @@ class GraphQLClient:
# Add check runs
result['checks'] = {}
for suite in nested_get(commit, 'checkSuites', 'nodes', default=[]):
for run in nested_get(suite, 'checkRuns', 'nodes', default=[]):
for suite in self._fetch_canmerge_suites(github, log, commit):
for run in self._fetch_canmerge_runs(github, log, suite):
result['checks'][run['name']] = {
**run,
"app": suite.get("app")
@@ -156,6 +173,35 @@ class GraphQLClient:
return result
def _fetch_canmerge_suites(self, github, log, commit):
commit_id = commit['id']
while True:
for suite in nested_get(
commit, 'checkSuites', 'nodes', default=[]):
yield suite
page_info = nested_get(commit, 'checkSuites', 'pageInfo')
if not page_info['hasNextPage']:
return
data = self._run_query(
log, github, 'canmerge-page-suites',
commit_node_id=commit_id,
cursor=page_info['endCursor'])
commit = nested_get(data, 'data', 'node')
def _fetch_canmerge_runs(self, github, log, suite):
suite_id = suite['id']
while True:
for run in nested_get(suite, 'checkRuns', 'nodes', default=[]):
yield run
page_info = nested_get(suite, 'checkRuns', 'pageInfo')
if not page_info['hasNextPage']:
return
data = self._run_query(
log, github, 'canmerge-page-runs',
suite_node_id=suite_id,
cursor=page_info['endCursor'])
suite = nested_get(data, 'data', 'node')
def _fetch_branch_protection(self, log, github, project,
zuul_event_id=None):
owner, repo = project.name.split('/')

View File

@@ -1,52 +0,0 @@
query canMergeData(
$owner: String!
$repo: String!
$pull: Int!
$head_sha: String!
) {
repository(owner: $owner, name: $repo) {
branchProtectionRules(first: 100) {
nodes {
pattern
requiredStatusCheckContexts
requiresApprovingReviews
requiresCodeOwnerReviews
matchingRefs(first: 100) {
nodes {
name
}
}
}
}
pullRequest(number: $pull) {
isDraft
}
object(expression: $head_sha) {
... on Commit {
checkSuites(first: 100) {
nodes {
app {
name
slug
}
checkRuns(first: 100) {
nodes {
name
conclusion
}
}
}
}
status {
contexts {
creator {
login
}
state
context
}
}
}
}
}
}

View File

@@ -0,0 +1,26 @@
query canMergeDataPageRules(
$owner: String!
$repo: String!
$cursor: String
$ref_name: String!
) {
repository(owner: $owner, name: $repo) {
branchProtectionRules(first: 100, after: $cursor) {
pageInfo {
endCursor
hasNextPage
}
nodes {
pattern
requiredStatusCheckContexts
requiresApprovingReviews
requiresCodeOwnerReviews
matchingRefs(first: 100, query: $ref_name) {
nodes {
name
}
}
}
}
}
}

View File

@@ -0,0 +1,19 @@
query canMergeDataPageRuns(
$suite_node_id: ID!
$cursor: String
) {
node(id: $suite_node_id) {
... on CheckSuite {
checkRuns(first: 100, after: $cursor) {
pageInfo {
endCursor
hasNextPage
}
nodes {
name
conclusion
}
}
}
}
}

View File

@@ -0,0 +1,41 @@
query canMergeDataPageSuites(
$commit_node_id: ID!
$cursor: String
) {
node(id: $commit_node_id) {
... on Commit {
checkSuites(first: 100, after: $cursor) {
pageInfo {
endCursor
hasNextPage
}
nodes {
id
app {
name
slug
}
checkRuns(first: 100) {
pageInfo {
endCursor
hasNextPage
}
nodes {
name
conclusion
}
}
}
}
status {
contexts {
creator {
login
}
state
context
}
}
}
}
}

View File

@@ -3,15 +3,20 @@ query canMergeData(
$repo: String!
$pull: Int!
$head_sha: String!
$ref_name: String!
) {
repository(owner: $owner, name: $repo) {
branchProtectionRules(first: 100) {
pageInfo {
endCursor
hasNextPage
}
nodes {
pattern
requiredStatusCheckContexts
requiresApprovingReviews
requiresCodeOwnerReviews
matchingRefs(first: 100) {
matchingRefs(first: 100, query: $ref_name) {
nodes {
name
}
@@ -25,13 +30,23 @@ query canMergeData(
}
object(expression: $head_sha) {
... on Commit {
id
checkSuites(first: 100) {
pageInfo {
endCursor
hasNextPage
}
nodes {
id
app {
name
slug
}
checkRuns(first: 100) {
pageInfo {
endCursor
hasNextPage
}
nodes {
name
conclusion