Merge "Optimize canMerge using graphql"

This commit is contained in:
Zuul 2020-03-06 18:08:03 +00:00 committed by Gerrit Code Review
commit 80bdb338c1
8 changed files with 376 additions and 56 deletions

View File

@ -2,6 +2,7 @@ include AUTHORS
include ChangeLog
include zuul/ansible/*/*
include zuul/ansible/*/*/*
include zuul/driver/github/graphql/*
include zuul/lib/ansible-config.conf
include zuul/web/static/*
include zuul/web/static/static/*/*

View File

@ -31,6 +31,7 @@ paho-mqtt
cherrypy
ws4py
routes
pathspec
jsonpath-rw
urllib3!=1.25.4,!=1.25.5 # https://github.com/urllib3/urllib3/pull/1684
# TODO(tobiash): cheroot 8.1.0 introduced a regression when handling concurrent

View File

@ -5,3 +5,4 @@ testtools>=0.9.32
PyMySQL
psycopg2-binary
beautifulsoup4
graphene

166
tests/fake_graphql.py Normal file
View File

@ -0,0 +1,166 @@
# Copyright 2019 BMW Group
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from graphene import Boolean, Field, Int, List, ObjectType, String
class FakePageInfo(ObjectType):
end_cursor = String()
has_next_page = Boolean()
def resolve_end_cursor(parent, info):
return 'testcursor'
def resolve_has_next_page(parent, info):
return False
class FakeBranchProtectionRule(ObjectType):
pattern = String()
requiredStatusCheckContexts = List(String)
requiresApprovingReviews = Boolean()
requiresCodeOwnerReviews = Boolean()
def resolve_pattern(parent, info):
return parent.pattern
def resolve_requiredStatusCheckContexts(parent, info):
return parent.required_contexts
def resolve_requiresApprovingReviews(parent, info):
return parent.require_reviews
def resolve_requiresCodeOwnerReviews(parent, info):
return parent.require_codeowners_review
class FakeBranchProtectionRules(ObjectType):
nodes = List(FakeBranchProtectionRule)
def resolve_nodes(parent, info):
return parent.values()
class FakeStatusContext(ObjectType):
state = String()
context = String()
def resolve_state(parent, info):
state = parent.state.upper()
return state
def resolve_context(parent, info):
return parent.context
class FakeStatus(ObjectType):
contexts = List(FakeStatusContext)
def resolve_contexts(parent, info):
return parent
class FakeCheckRun(ObjectType):
name = String()
conclusion = String()
def resolve_name(parent, info):
return parent.name
def resolve_conclusion(parent, info):
return parent.conclusion.upper()
class FakeCheckRuns(ObjectType):
nodes = List(FakeCheckRun)
def resolve_nodes(parent, info):
return parent
class FakeCheckSuite(ObjectType):
checkRuns = Field(FakeCheckRuns, first=Int())
def resolve_checkRuns(parent, info, first=None):
return parent
class FakeCheckSuites(ObjectType):
nodes = List(FakeCheckSuite)
def resolve_nodes(parent, info):
# Note: we only use a single check suite in the tests so return a
# single item to keep it simple.
return [parent]
class FakeCommit(ObjectType):
class Meta:
# Graphql object type that defaults to the class name, but we require
# 'Commit'.
name = 'Commit'
status = Field(FakeStatus)
checkSuites = Field(FakeCheckSuites, first=Int())
def resolve_status(parent, info):
seen = set()
result = []
for status in parent._statuses:
if status.context not in seen:
seen.add(status.context)
result.append(status)
# Github returns None if there are no results
return result or None
def resolve_checkSuites(parent, info, first=None):
# Tests only utilize one check suite so return all runs for that.
return parent._check_runs
class FakePullRequest(ObjectType):
isDraft = Boolean()
def resolve_isDraft(parent, info):
return parent.draft
class FakeRepository(ObjectType):
name = String()
branchProtectionRules = Field(FakeBranchProtectionRules, first=Int())
pullRequest = Field(FakePullRequest, number=Int(required=True))
object = Field(FakeCommit, expression=String(required=True))
def resolve_name(parent, info):
org, name = parent.name.split('/')
return name
def resolve_branchProtectionRules(parent, info, first):
return parent._branch_protection_rules
def resolve_pullRequest(parent, info, number):
return parent.data.pull_requests.get(number)
def resolve_object(parent, info, expression):
return parent._commits.get(expression)
class FakeGithubQuery(ObjectType):
repository = Field(FakeRepository, owner=String(required=True),
name=String(required=True))
def resolve_repository(root, info, owner, name):
return info.context.repos.get((owner, name))

View File

@ -19,9 +19,12 @@ import github3.exceptions
import re
import time
import graphene
from requests import HTTPError
from requests.structures import CaseInsensitiveDict
from tests.fake_graphql import FakeGithubQuery
FAKE_BASE_URL = 'https://example.com/api/v3/'
@ -537,6 +540,8 @@ class FakeGithubSession(object):
def __init__(self, data):
self._data = data
self.headers = CaseInsensitiveDict()
self._base_url = None
self.schema = graphene.Schema(query=FakeGithubQuery)
def build_url(self, *args):
fakepath = '/'.join(args)
@ -555,6 +560,17 @@ class FakeGithubSession(object):
# unknown entity to process
return None
def post(self, url, data=None, headers=None, params=None, json=None):
if json and json.get('query'):
query = json.get('query')
variables = json.get('variables')
result = self.schema.execute(
query, variables=variables, context=self._data)
return FakeResponse({'data': result.data}, 200)
return FakeResponse(None, 404)
def get_repo(self, request, params=None):
org, project, request = request.split('/', 2)
project_name = '{}/{}'.format(org, project)
@ -570,6 +586,8 @@ class FakeBranchProtectionRule:
def __init__(self):
self.pattern = None
self.required_contexts = []
self.require_reviews = False
self.require_codeowners_review = False
class FakeGithubData(object):

View File

@ -23,6 +23,7 @@ import threading
import time
import re
import json
from collections import OrderedDict
import cherrypy
import cachecontrol
@ -34,9 +35,11 @@ import jwt
import requests
import github3
import github3.exceptions
import github3.pulls
from github3.session import AppInstallationTokenAuth
from zuul.connection import BaseConnection
from zuul.driver.github.graphql import GraphQLClient
from zuul.lib.gearworker import ZuulGearWorker
from zuul.web.handler import BaseWebController
from zuul.lib.logutil import get_annotated_logger
@ -47,6 +50,7 @@ from zuul.driver.github.githubmodel import PullRequest, GithubTriggerEvent
GITHUB_BASE_URL = 'https://api.github.com'
PREVIEW_JSON_ACCEPT = 'application/vnd.github.machine-man-preview+json'
PREVIEW_DRAFT_ACCEPT = 'application/vnd.github.shadow-cat-preview+json'
PREVIEW_CHECKS_ACCEPT = 'application/vnd.github.antiope-preview+json'
def _sign_request(body, secret):
@ -78,10 +82,18 @@ class GithubRequestLogger:
self.log = get_annotated_logger(log, zuul_event_id)
def log_request(self, response, *args, **kwargs):
self.log.debug('%s %s result: %s, size: %s, duration: %s',
response.request.method, response.url,
response.status_code, len(response.content),
int(response.elapsed.microseconds / 1000))
fields = OrderedDict()
fields['result'] = response.status_code
fields['size'] = len(response.content)
fields['duration'] = int(response.elapsed.microseconds / 1000)
if response.url.endswith('/graphql'):
body = json.loads(response.request.body)
for key, value in body.get('variables', {}).items():
fields[key] = value
info = ', '.join(['%s: %s' % (key, value)
for key, value in fields.items()])
self.log.debug('%s %s %s',
response.request.method, response.url, info)
class GithubRateLimitHandler:
@ -775,8 +787,10 @@ class GithubConnection(BaseConnection):
self._log_rate_limit = False
if self.server == 'github.com':
self.api_base_url = GITHUB_BASE_URL
self.base_url = GITHUB_BASE_URL
else:
self.api_base_url = 'https://%s/api' % self.server
self.base_url = 'https://%s/api/v3' % self.server
# ssl verification must default to true
@ -822,6 +836,8 @@ class GithubConnection(BaseConnection):
r"^Depends-On: https://%s/.+/.+/pull/[0-9]+$" % self.server,
re.MULTILINE | re.IGNORECASE)
self.graphql_client = GraphQLClient('%s/graphql' % self.api_base_url)
def toDict(self):
d = super().toDict()
d.update({
@ -1520,31 +1536,24 @@ class GithubConnection(BaseConnection):
github = self.getGithubClient(change.project.name, zuul_event_id=event)
# Append accept header so we get the draft status
# Append accept headers so we get the draft status and checks api
self._append_accept_header(github, PREVIEW_DRAFT_ACCEPT)
self._append_accept_header(github, PREVIEW_CHECKS_ACCEPT)
owner, proj = change.project.name.split('/')
pull = github.pull_request(owner, proj, change.number)
# For performance reasons fetch all needed data for canMerge upfront
# using a single graphql call.
canmerge_data = self.graphql_client.fetch_canmerge(github, change)
# If the PR is a draft it cannot be merged.
# TODO: This uses the dict instead of the pull object since github3.py
# has no support for draft PRs yet. Replace this with pull.draft when
# support has been added.
# https://github.com/sigmavirus24/github3.py/issues/926
if pull.as_dict().get('draft', False):
if canmerge_data.get('isDraft', False):
log.debug('Change %s can not merge because it is a draft', change)
return False
protection = self._getBranchProtection(
change.project.name, change.branch, zuul_event_id=event)
if not self._hasRequiredStatusChecks(allow_needs, protection, pull):
if not self._hasRequiredStatusChecks(allow_needs, canmerge_data):
return False
required_reviews = protection.get(
'required_pull_request_reviews')
if required_reviews:
if required_reviews.get('require_code_owner_reviews'):
if canmerge_data.get('requiresApprovingReviews'):
if canmerge_data.get('requiresCodeOwnerReviews'):
# we need to process the reviews using code owners
# TODO(tobiash): not implemented yet
pass
@ -1659,14 +1668,9 @@ class GithubConnection(BaseConnection):
return resp.json()
def _hasRequiredStatusChecks(self, allow_needs, protection, pull):
if not protection:
# There are no protection settings -> ok by definition
return True
required_contexts = protection.get(
'required_status_checks', {}).get('contexts')
@staticmethod
def _hasRequiredStatusChecks(allow_needs, canmerge_data):
required_contexts = canmerge_data['requiredStatusCheckContexts']
if not required_contexts:
# There are no required contexts -> ok by definition
return True
@ -1675,33 +1679,9 @@ class GithubConnection(BaseConnection):
required_contexts = set(
[x for x in required_contexts if x not in allow_needs])
# NOTE(tobiash): We cannot just take the last commit in the list
# because it is not sorted that the head is the last one in every case.
# E.g. when doing a re-merge from the target the PR head can be
# somewhere in the middle of the commit list. Thus we need to search
# the whole commit list for the PR head commit which has the statuses
# attached.
commits = list(pull.commits())
commit = None
for c in commits:
if c.sha == pull.head.sha:
commit = c
break
# Get successful statuses
successful = set([s.context for s in commit.status().statuses
if s.state == 'success'])
if self.app_id:
try:
# Required contexts can be fulfilled by statuses or check runs.
successful.update([cr.name for cr in commit.check_runs()
if cr.conclusion == 'success'])
except github3.exceptions.GitHubException as exc:
self.log.error(
"Unable to retrieve check runs for commit %s: %s",
commit.sha, str(exc)
)
successful = set([s[0] for s in canmerge_data['status'].items()
if s[1] == 'SUCCESS'])
# Required contexts must be a subset of the successful contexts as
# we allow additional successful status contexts we don't care about.
@ -1811,11 +1791,11 @@ class GithubConnection(BaseConnection):
github = self.getGithubClient(
project_name, zuul_event_id=zuul_event_id
)
self._append_accept_header(github, PREVIEW_CHECKS_ACCEPT)
url = github.session.build_url(
"repos", project_name, "commits", sha, "check-runs")
headers = {'Accept': 'application/vnd.github.antiope-preview+json'}
params = {"per_page": 100}
resp = github.session.get(url, params=params, headers=headers)
resp = github.session.get(url, params=params)
resp.raise_for_status()
log.debug("Got commit checks for sha %s on %s", sha, project_name)

View File

@ -0,0 +1,113 @@
# Copyright 2020 BMW Group
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import logging
import pathspec
from pkg_resources import resource_string
def nested_get(d, *keys, default=None):
temp = d
for key in keys[:-1]:
temp = temp.get(key, {}) if temp is not None else None
return temp.get(keys[-1], default) if temp is not None else default
class GraphQLClient:
log = logging.getLogger('zuul.github.graphql')
def __init__(self, url):
self.url = url
self.queries = {}
self._load_queries()
def _load_queries(self):
self.log.debug('Loading prepared graphql queries')
query_names = [
'canmerge',
]
for query_name in query_names:
self.queries[query_name] = resource_string(
__name__, '%s.graphql' % query_name).decode('utf-8')
@staticmethod
def _prepare_query(query, variables):
data = {
'query': query,
'variables': variables,
}
return data
def _fetch_canmerge(self, github, owner, repo, pull, sha):
variables = {
'zuul_query': 'canmerge', # used for logging
'owner': owner,
'repo': repo,
'pull': pull,
'head_sha': sha,
}
query = self._prepare_query(self.queries['canmerge'], variables)
response = github.session.post(self.url, json=query)
return response.json()
def fetch_canmerge(self, github, change):
owner, repo = change.project.name.split('/')
data = self._fetch_canmerge(github, owner, repo, change.number,
change.patchset)
result = {}
repository = nested_get(data, 'data', 'repository')
# Find corresponding rule to our branch
rules = nested_get(repository, 'branchProtectionRules', 'nodes',
default=[])
matching_rule = None
for rule in rules:
pattern = pathspec.patterns.GitWildMatchPattern(
rule.get('pattern'))
match = pathspec.match_files([pattern], [change.branch])
if match:
matching_rule = rule
break
# If there is a matching rule, get required status checks
if matching_rule:
result['requiredStatusCheckContexts'] = matching_rule.get(
'requiredStatusCheckContexts', [])
result['requiresApprovingReviews'] = matching_rule.get(
'requiresApprovingReviews')
result['requiresCodeOwnerReviews'] = matching_rule.get(
'requiresCodeOwnerReviews')
else:
result['requiredStatusCheckContexts'] = []
# Check for draft
pull_request = nested_get(repository, 'pullRequest')
result['isDraft'] = nested_get(pull_request, 'isDraft', default=False)
# Add status checks
result['status'] = {}
commit = nested_get(data, 'data', 'repository', 'object')
# Status can be explicit None so make sure we work with a dict
# afterwards
status = commit.get('status') or {}
for context in status.get('contexts', []):
result['status'][context['context']] = context['state']
# Add check runs
for suite in nested_get(commit, 'checkSuites', 'nodes', default=[]):
for run in nested_get(suite, 'checkRuns', 'nodes', default=[]):
result['status'][run['name']] = run['conclusion']
return result

View File

@ -0,0 +1,40 @@
query canMergeData(
$owner: String!
$repo: String!
$pull: Int!
$head_sha: String!
) {
repository(owner: $owner, name: $repo) {
branchProtectionRules(first: 100) {
nodes {
pattern
requiredStatusCheckContexts
requiresApprovingReviews
requiresCodeOwnerReviews
}
}
pullRequest(number: $pull) {
isDraft
}
object(expression: $head_sha) {
... on Commit {
checkSuites(first: 100) {
nodes {
checkRuns(first: 100) {
nodes {
name
conclusion
}
}
}
}
status {
contexts {
state
context
}
}
}
}
}
}