4c972f00bd
The canMerge check is executed whenever zuul tests if a change can enter a gate pipeline. This is part of the critical path in the event handling of the scheduler and therefore must be as fast as possible. Currently this takes five requests for doing its work and also transfers large amounts of data that is unneeded: * get pull request * get branch protection settings * get commits * get status of latest commit * get check runs of latest commit Especially when Github is busy this can slow down zuul's event processing considerably. This can be optimized using graphql to only query the data we need with a single request. This reduces requests and load on Github and speeds up event processing in the scheduler. Since this is the first usage of graphql this also sets up needed testing infrastructure using graphene to mock the github api with real test data. Change-Id: I77be4f16cf7eb5c8035ce0312f792f4e8d4c3e10
669 lines
21 KiB
Python
669 lines
21 KiB
Python
#!/usr/bin/env python
|
|
|
|
# Copyright 2018 Red Hat, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
from collections import defaultdict
|
|
|
|
import github3.exceptions
|
|
import re
|
|
import time
|
|
|
|
import graphene
|
|
from requests import HTTPError
|
|
from requests.structures import CaseInsensitiveDict
|
|
|
|
from tests.fake_graphql import FakeGithubQuery
|
|
|
|
FAKE_BASE_URL = 'https://example.com/api/v3/'
|
|
|
|
|
|
class FakeUser(object):
|
|
def __init__(self, login):
|
|
self.login = login
|
|
self.name = "Github User"
|
|
self.email = "github.user@example.com"
|
|
self.html_url = 'https://example.com/%s' % login
|
|
|
|
|
|
class FakeBranch(object):
|
|
def __init__(self, fake_repo, branch='master', protected=False):
|
|
self.name = branch
|
|
self._fake_repo = fake_repo
|
|
|
|
@property
|
|
def protected(self):
|
|
return self.name in self._fake_repo._branch_protection_rules
|
|
|
|
def as_dict(self):
|
|
return {
|
|
'name': self.name,
|
|
'protected': self.protected
|
|
}
|
|
|
|
|
|
class FakeStatus(object):
|
|
def __init__(self, state, url, description, context, user):
|
|
self.state = state
|
|
self.context = context
|
|
self._url = url
|
|
self._description = description
|
|
self._user = user
|
|
|
|
def as_dict(self):
|
|
return {
|
|
'state': self.state,
|
|
'url': self._url,
|
|
'description': self._description,
|
|
'context': self.context,
|
|
'creator': {
|
|
'login': self._user
|
|
}
|
|
}
|
|
|
|
|
|
class FakeCheckRun(object):
|
|
def __init__(self, name, details_url, output, status, conclusion,
|
|
completed_at, app):
|
|
self.name = name
|
|
self.details_url = details_url
|
|
self.output = output
|
|
self.conclusion = conclusion
|
|
self.completed_at = completed_at
|
|
self.app = app
|
|
|
|
# Github automatically sets the status to "completed" if a conclusion
|
|
# is provided.
|
|
if conclusion is not None:
|
|
self.status = "completed"
|
|
else:
|
|
self.status = status
|
|
|
|
def as_dict(self):
|
|
return {
|
|
"name": self.name,
|
|
"status": self.status,
|
|
"output": self.output,
|
|
"details_url": self.details_url,
|
|
"conclusion": self.conclusion,
|
|
"completed_at": self.completed_at,
|
|
"app": {
|
|
"slug": self.app,
|
|
},
|
|
}
|
|
|
|
def update(self, conclusion, completed_at, output, details_url):
|
|
self.conclusion = conclusion
|
|
self.completed_at = completed_at
|
|
self.output = output
|
|
self.details_url = details_url
|
|
|
|
# As we are only calling the update method when a build is completed,
|
|
# we can always set the status to "completed".
|
|
self.status = "completed"
|
|
|
|
|
|
class FakeGHReview(object):
|
|
|
|
def __init__(self, data):
|
|
self.data = data
|
|
|
|
def as_dict(self):
|
|
return self.data
|
|
|
|
|
|
class FakeCombinedStatus(object):
|
|
def __init__(self, sha, statuses):
|
|
self.sha = sha
|
|
self.statuses = statuses
|
|
|
|
|
|
class FakeCommit(object):
|
|
def __init__(self, sha):
|
|
self._statuses = []
|
|
self.sha = sha
|
|
self._check_runs = []
|
|
|
|
def set_status(self, state, url, description, context, user):
|
|
status = FakeStatus(
|
|
state, url, description, context, user)
|
|
# always insert a status to the front of the list, to represent
|
|
# the last status provided for a commit.
|
|
self._statuses.insert(0, status)
|
|
|
|
def set_check_run(self, name, details_url, output, status, conclusion,
|
|
completed_at, app):
|
|
check_run = FakeCheckRun(
|
|
name, details_url, output, status, conclusion, completed_at, app)
|
|
# Always insert a check_run to the front of the list to represent the
|
|
# last check_run provided for a commit.
|
|
self._check_runs.insert(0, check_run)
|
|
|
|
def get_url(self, path, params=None):
|
|
if path == 'statuses':
|
|
statuses = [s.as_dict() for s in self._statuses]
|
|
return FakeResponse(statuses)
|
|
if path == "check-runs":
|
|
check_runs = [c.as_dict() for c in self._check_runs]
|
|
resp = {"total_count": len(check_runs), "check_runs": check_runs}
|
|
return FakeResponse(resp)
|
|
|
|
def statuses(self):
|
|
return self._statuses
|
|
|
|
def check_runs(self):
|
|
return self._check_runs
|
|
|
|
def status(self):
|
|
'''
|
|
Returns the combined status wich only contains the latest statuses of
|
|
the commit together with some other information that we don't need
|
|
here.
|
|
'''
|
|
latest_statuses_by_context = {}
|
|
for status in self._statuses:
|
|
if status.context not in latest_statuses_by_context:
|
|
latest_statuses_by_context[status.context] = status
|
|
combined_statuses = latest_statuses_by_context.values()
|
|
return FakeCombinedStatus(self.sha, combined_statuses)
|
|
|
|
|
|
class FakeRepository(object):
|
|
def __init__(self, name, data):
|
|
self._api = FAKE_BASE_URL
|
|
self._branches = [FakeBranch(self)]
|
|
self._commits = {}
|
|
self.data = data
|
|
self.name = name
|
|
|
|
# Simple dictionary to store permission values per feature (e.g.
|
|
# checks, Repository contents, Pull requests, Commit statuses, ...).
|
|
# Could be used to just enable/deable a permission (True, False) or
|
|
# provide more specific values like "read" or "read&write". The mocked
|
|
# functionality in the FakeRepository class should then check for this
|
|
# value and raise an appropriate exception like a production Github
|
|
# would do in case the permission is not sufficient or missing at all.
|
|
self._permissions = {}
|
|
|
|
# List of branch protection rules
|
|
self._branch_protection_rules = defaultdict(FakeBranchProtectionRule)
|
|
|
|
# fail the next commit requests with 404
|
|
self.fail_not_found = 0
|
|
|
|
def branches(self, protected=False):
|
|
if protected:
|
|
# simulate there is no protected branch
|
|
return [b for b in self._branches if b.protected]
|
|
return self._branches
|
|
|
|
def _set_branch_protection(self, branch_name, protected=True,
|
|
contexts=None):
|
|
if not protected and branch_name in self._branch_protection_rules:
|
|
del self._branch_protection_rules[branch_name]
|
|
return
|
|
|
|
rule = self._branch_protection_rules[branch_name]
|
|
rule.pattern = branch_name
|
|
rule.required_contexts = contexts or []
|
|
|
|
def _set_permission(self, key, value):
|
|
# NOTE (felix): Currently, this is only used to mock a repo with
|
|
# missing checks API permissions. But we could also use it to test
|
|
# arbitrary permission values like missing write, but only read
|
|
# permissions for a specific functionality.
|
|
self._permissions[key] = value
|
|
|
|
def _build_url(self, *args, **kwargs):
|
|
path_args = ['repos', self.name]
|
|
path_args.extend(args)
|
|
fakepath = '/'.join(path_args)
|
|
return FAKE_BASE_URL + fakepath
|
|
|
|
def _get(self, url, headers=None):
|
|
client = FakeGithubClient(self.data)
|
|
return client.session.get(url, headers)
|
|
|
|
def _create_branch(self, branch):
|
|
self._branches.append((FakeBranch(self, branch=branch)))
|
|
|
|
def _delete_branch(self, branch_name):
|
|
self._branches = [b for b in self._branches if b.name != branch_name]
|
|
|
|
def create_status(self, sha, state, url, description, context,
|
|
user='zuul'):
|
|
# Since we're bypassing github API, which would require a user, we
|
|
# default the user as 'zuul' here.
|
|
commit = self._commits.get(sha, None)
|
|
if commit is None:
|
|
commit = FakeCommit(sha)
|
|
self._commits[sha] = commit
|
|
commit.set_status(state, url, description, context, user)
|
|
|
|
def create_check_run(self, head_sha, name, details_url=None, output=None,
|
|
status=None, conclusion=None, completed_at=None,
|
|
app="zuul"):
|
|
|
|
# Raise the appropriate github3 exception in case we don't have
|
|
# permission to access the checks API
|
|
if self._permissions.get("checks") is False:
|
|
# To create a proper github3 exception, we need to mock a response
|
|
# object
|
|
raise github3.exceptions.ForbiddenError(
|
|
FakeResponse("Resource not accessible by integration", 403)
|
|
)
|
|
|
|
commit = self._commits.get(head_sha, None)
|
|
if commit is None:
|
|
commit = FakeCommit(head_sha)
|
|
self._commits[head_sha] = commit
|
|
commit.set_check_run(
|
|
name, details_url, output, status, conclusion, completed_at, app)
|
|
|
|
def commit(self, sha):
|
|
|
|
if self.fail_not_found > 0:
|
|
self.fail_not_found -= 1
|
|
|
|
class Response:
|
|
status_code = 0
|
|
message = ''
|
|
|
|
def json(self):
|
|
return {
|
|
'message': self.message
|
|
}
|
|
|
|
resp = Response()
|
|
resp.status_code = 404
|
|
resp.message = 'Not Found'
|
|
|
|
raise github3.exceptions.NotFoundError(resp)
|
|
|
|
commit = self._commits.get(sha, None)
|
|
if commit is None:
|
|
commit = FakeCommit(sha)
|
|
self._commits[sha] = commit
|
|
return commit
|
|
|
|
def get_url(self, path, params=None):
|
|
if '/' in path:
|
|
entity, request = path.split('/', 1)
|
|
else:
|
|
entity = path
|
|
request = None
|
|
|
|
if entity == 'branches':
|
|
return self.get_url_branches(request, params=params)
|
|
if entity == 'collaborators':
|
|
return self.get_url_collaborators(request)
|
|
if entity == 'commits':
|
|
return self.get_url_commits(request, params=params)
|
|
else:
|
|
return None
|
|
|
|
def get_url_branches(self, path, params=None):
|
|
if path is None:
|
|
# request wants a branch list
|
|
return self.get_url_branch_list(params)
|
|
|
|
elements = path.split('/')
|
|
|
|
entity = elements[-1]
|
|
if entity == 'protection':
|
|
branch = '/'.join(elements[0:-1])
|
|
return self.get_url_protection(branch)
|
|
else:
|
|
# fall back to treat all elements as branch
|
|
branch = '/'.join(elements)
|
|
return self.get_url_branch(branch)
|
|
|
|
def get_url_commits(self, path, params=None):
|
|
if '/' in path:
|
|
sha, request = path.split('/', 1)
|
|
else:
|
|
sha = path
|
|
request = None
|
|
commit = self._commits.get(sha)
|
|
|
|
# Commits are created lazy so check if there is a PR with the correct
|
|
# head sha.
|
|
if commit is None:
|
|
pull_requests = [pr for pr in self.data.pull_requests.values()
|
|
if pr.head_sha == sha]
|
|
if pull_requests:
|
|
commit = FakeCommit(sha)
|
|
self._commits[sha] = commit
|
|
|
|
if not commit:
|
|
return FakeResponse({}, 404)
|
|
|
|
return commit.get_url(request, params=params)
|
|
|
|
def get_url_branch_list(self, params):
|
|
if params.get('protected') == 1:
|
|
exclude_unprotected = True
|
|
else:
|
|
exclude_unprotected = False
|
|
branches = [x.as_dict() for x in self.branches(exclude_unprotected)]
|
|
|
|
return FakeResponse(branches, 200)
|
|
|
|
def get_url_branch(self, branch_name):
|
|
for branch in self._branches:
|
|
if branch.name == branch_name:
|
|
return FakeResponse(branch.as_dict())
|
|
return FakeResponse(None, 404)
|
|
|
|
def get_url_collaborators(self, path):
|
|
login, entity = path.split('/')
|
|
|
|
if entity == 'permission':
|
|
owner, proj = self.name.split('/')
|
|
permission = None
|
|
for pr in self.data.pull_requests.values():
|
|
pr_owner, pr_project = pr.project.split('/')
|
|
if (pr_owner == owner and proj == pr_project):
|
|
if login in pr.admins:
|
|
permission = 'admin'
|
|
break
|
|
elif login in pr.writers:
|
|
permission = 'write'
|
|
break
|
|
else:
|
|
permission = 'read'
|
|
data = {
|
|
'permission': permission,
|
|
}
|
|
return FakeResponse(data)
|
|
else:
|
|
return None
|
|
|
|
def get_url_protection(self, branch):
|
|
rule = self._branch_protection_rules.get(branch)
|
|
|
|
if not rule:
|
|
# Note that GitHub returns 404 if branch protection is off so do
|
|
# the same here as well
|
|
return FakeResponse({}, 404)
|
|
data = {
|
|
'required_status_checks': {
|
|
'contexts': rule.required_contexts
|
|
}
|
|
}
|
|
return FakeResponse(data)
|
|
|
|
def pull_requests(self, state=None, sort=None, direction=None):
|
|
# sort and direction are unused currently, but present to match
|
|
# real world call signatures.
|
|
pulls = []
|
|
for pull in self.data.pull_requests.values():
|
|
if pull.project != self.name:
|
|
continue
|
|
if state and pull.state != state:
|
|
continue
|
|
pulls.append(FakePull(pull))
|
|
return pulls
|
|
|
|
|
|
class FakeIssue(object):
|
|
def __init__(self, fake_pull_request):
|
|
self._fake_pull_request = fake_pull_request
|
|
|
|
def pull_request(self):
|
|
return FakePull(self._fake_pull_request)
|
|
|
|
@property
|
|
def number(self):
|
|
return self._fake_pull_request.number
|
|
|
|
|
|
class FakeFile(object):
|
|
def __init__(self, filename):
|
|
self.filename = filename
|
|
|
|
|
|
class FakePull(object):
|
|
def __init__(self, fake_pull_request):
|
|
self._fake_pull_request = fake_pull_request
|
|
|
|
def issue(self):
|
|
return FakeIssue(self._fake_pull_request)
|
|
|
|
def files(self):
|
|
# Github lists max. 300 files of a PR in alphabetical order
|
|
return [FakeFile(fn)
|
|
for fn in sorted(self._fake_pull_request.files)][:300]
|
|
|
|
def reviews(self):
|
|
return self._fake_pull_request.reviews
|
|
|
|
def create_review(self, body, commit_id, event):
|
|
review = FakeGHReview({
|
|
'state': event,
|
|
'user': {
|
|
'login': 'fakezuul',
|
|
'email': 'fakezuul@fake.test',
|
|
},
|
|
'submitted_at': time.gmtime(),
|
|
})
|
|
self._fake_pull_request.reviews.append(review)
|
|
return review
|
|
|
|
@property
|
|
def head(self):
|
|
client = FakeGithubClient(self._fake_pull_request.github.github_data)
|
|
repo = client.repo_from_project(self._fake_pull_request.project)
|
|
return repo.commit(self._fake_pull_request.head_sha)
|
|
|
|
def commits(self):
|
|
# since we don't know all commits of a pr we just return here a list
|
|
# with the head_sha as the only commit
|
|
return [self.head]
|
|
|
|
def as_dict(self):
|
|
pr = self._fake_pull_request
|
|
connection = pr.github
|
|
data = {
|
|
'number': pr.number,
|
|
'title': pr.subject,
|
|
'url': 'https://%s/api/v3/%s/pulls/%s' % (
|
|
connection.server, pr.project, pr.number
|
|
),
|
|
'html_url': 'https://%s/%s/pull/%s' % (
|
|
connection.server, pr.project, pr.number
|
|
),
|
|
'updated_at': pr.updated_at,
|
|
'base': {
|
|
'repo': {
|
|
'full_name': pr.project
|
|
},
|
|
'ref': pr.branch,
|
|
},
|
|
'draft': pr.draft,
|
|
'mergeable': True,
|
|
'state': pr.state,
|
|
'head': {
|
|
'sha': pr.head_sha,
|
|
'ref': pr.getPRReference(),
|
|
'repo': {
|
|
'full_name': pr.project
|
|
}
|
|
},
|
|
'merged': pr.is_merged,
|
|
'body': pr.body,
|
|
'changed_files': len(pr.files),
|
|
'labels': [{'name': l} for l in pr.labels]
|
|
}
|
|
return data
|
|
|
|
|
|
class FakeIssueSearchResult(object):
|
|
def __init__(self, issue):
|
|
self.issue = issue
|
|
|
|
|
|
class FakeResponse(object):
|
|
def __init__(self, data, status_code=200):
|
|
self.status_code = status_code
|
|
self.data = data
|
|
self.links = {}
|
|
|
|
@property
|
|
def content(self):
|
|
# Building github3 exceptions requires a Response object with the
|
|
# content attribute set.
|
|
return self.data
|
|
|
|
def json(self):
|
|
return self.data
|
|
|
|
def raise_for_status(self):
|
|
if 400 <= self.status_code < 600:
|
|
raise HTTPError('{} Something went wrong'.format(self.status_code),
|
|
response=self)
|
|
|
|
|
|
class FakeGithubSession(object):
|
|
|
|
def __init__(self, data):
|
|
self._data = data
|
|
self.headers = CaseInsensitiveDict()
|
|
self._base_url = None
|
|
self.schema = graphene.Schema(query=FakeGithubQuery)
|
|
|
|
def build_url(self, *args):
|
|
fakepath = '/'.join(args)
|
|
return FAKE_BASE_URL + fakepath
|
|
|
|
def get(self, url, headers=None, params=None):
|
|
request = url
|
|
if request.startswith(FAKE_BASE_URL):
|
|
request = request[len(FAKE_BASE_URL):]
|
|
|
|
entity, request = request.split('/', 1)
|
|
|
|
if entity == 'repos':
|
|
return self.get_repo(request, params=params)
|
|
else:
|
|
# unknown entity to process
|
|
return None
|
|
|
|
def post(self, url, data=None, headers=None, params=None, json=None):
|
|
|
|
if json and json.get('query'):
|
|
query = json.get('query')
|
|
variables = json.get('variables')
|
|
result = self.schema.execute(
|
|
query, variables=variables, context=self._data)
|
|
return FakeResponse({'data': result.data}, 200)
|
|
|
|
return FakeResponse(None, 404)
|
|
|
|
def get_repo(self, request, params=None):
|
|
org, project, request = request.split('/', 2)
|
|
project_name = '{}/{}'.format(org, project)
|
|
|
|
client = FakeGithubClient(self._data)
|
|
repo = client.repo_from_project(project_name)
|
|
|
|
return repo.get_url(request, params=params)
|
|
|
|
|
|
class FakeBranchProtectionRule:
|
|
|
|
def __init__(self):
|
|
self.pattern = None
|
|
self.required_contexts = []
|
|
self.require_reviews = False
|
|
self.require_codeowners_review = False
|
|
|
|
|
|
class FakeGithubData(object):
|
|
def __init__(self, pull_requests):
|
|
self.pull_requests = pull_requests
|
|
self.repos = {}
|
|
|
|
|
|
class FakeGithubClient(object):
|
|
def __init__(self, data, inst_id=None):
|
|
self._data = data
|
|
self._inst_id = inst_id
|
|
self.session = FakeGithubSession(data)
|
|
|
|
def user(self, login):
|
|
return FakeUser(login)
|
|
|
|
def repository(self, owner, proj):
|
|
return self._data.repos.get((owner, proj), None)
|
|
|
|
def repo_from_project(self, project):
|
|
# This is a convenience method for the tests.
|
|
owner, proj = project.split('/')
|
|
return self.repository(owner, proj)
|
|
|
|
def addProject(self, project):
|
|
owner, proj = project.name.split('/')
|
|
self._data.repos[(owner, proj)] = FakeRepository(
|
|
project.name, self._data)
|
|
|
|
def addProjectByName(self, project_name):
|
|
owner, proj = project_name.split('/')
|
|
self._data.repos[(owner, proj)] = FakeRepository(
|
|
project_name, self._data)
|
|
|
|
def pull_request(self, owner, project, number):
|
|
fake_pr = self._data.pull_requests[int(number)]
|
|
return FakePull(fake_pr)
|
|
|
|
def search_issues(self, query):
|
|
def tokenize(s):
|
|
# Tokenize with handling for quoted substrings.
|
|
# Bit hacky and needs PDA, but our current inputs are
|
|
# constrained enough that this should work.
|
|
s = s[:-len(" type:pr is:open in:body")]
|
|
OR_split = [x.strip() for x in s.split('OR')]
|
|
tokens = [x.strip('"') for x in OR_split]
|
|
return tokens
|
|
|
|
def query_is_sha(s):
|
|
return re.match(r'[a-z0-9]{40}', s)
|
|
|
|
if query_is_sha(query):
|
|
return (FakeIssueSearchResult(FakeIssue(pr))
|
|
for pr in self._data.pull_requests.values()
|
|
if pr.head_sha == query)
|
|
|
|
# Non-SHA queries are of the form:
|
|
#
|
|
# '"Depends-On: <url>" OR "Depends-On: <url>"
|
|
# OR ... type:pr is:open in:body'
|
|
#
|
|
# For the tests is currently enough to simply check for the
|
|
# existence of the Depends-On strings in the PR body.
|
|
tokens = tokenize(query)
|
|
terms = set(tokens)
|
|
results = []
|
|
for pr in self._data.pull_requests.values():
|
|
if not pr.body:
|
|
body = ""
|
|
else:
|
|
body = pr.body
|
|
for term in terms:
|
|
if term in body:
|
|
issue = FakeIssue(pr)
|
|
results.append(FakeIssueSearchResult(issue))
|
|
break
|
|
|
|
return iter(results)
|