The Gatekeeper, or a project gating system
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1621 lines
62 KiB

# Copyright 2015 Hewlett-Packard Development Company, L.P.
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import collections
import concurrent.futures
import datetime
import logging
import hmac
import hashlib
import queue
import threading
import time
import re
import json
import traceback
import cherrypy
import cachecontrol
from cachecontrol.cache import DictCache
from cachecontrol.heuristics import BaseHeuristic
import cachetools
import iso8601
import jwt
import requests
import github3
import github3.exceptions
import gear
from zuul.connection import BaseConnection
from zuul.web.handler import BaseWebController
from zuul.lib.config import get_default
from zuul.model import Ref, Branch, Tag, Project
from zuul.exceptions import MergeFailure
from zuul.driver.github.githubmodel import PullRequest, GithubTriggerEvent
PREVIEW_JSON_ACCEPT = 'application/vnd.github.machine-man-preview+json'
def _sign_request(body, secret):
signature = 'sha1=' +
secret.encode('utf-8'), body, hashlib.sha1).hexdigest()
return signature
class UTC(datetime.tzinfo):
def utcoffset(self, dt):
return datetime.timedelta(0)
def tzname(self, dt):
return "UTC"
def dst(self, dt):
return datetime.timedelta(0)
utc = UTC()
class GithubShaCache(object):
def __init__(self):
self.projects = {}
def update(self, project_name, pr):
project_cache = self.projects.setdefault(
# Cache up to 4k shas for each project
# Note we cache the actual sha for a PR and the
# merge_commit_sha so we make this fairly large.
sha = pr['head']['sha']
number = pr['number']
cached_prs = project_cache.setdefault(sha, set())
merge_commit_sha = pr.get('merge_commit_sha')
if merge_commit_sha:
cached_prs = project_cache.setdefault(merge_commit_sha, set())
def get(self, project_name, sha):
project_cache = self.projects.get(project_name, {})
cached_prs = project_cache.get(sha, set())
return cached_prs
class GithubGearmanWorker(object):
"""A thread that answers gearman requests"""
log = logging.getLogger("zuul.GithubGearmanWorker")
def __init__(self, connection):
self.config = connection.sched.config
self.connection = connection
self.thread = threading.Thread(target=self._run,
self._running = False
handler = "github:%s:payload" % self.connection.connection_name = {
handler: self.handle_payload,
def _run(self):
while self._running:
job = self.gearman.getJob()
if not in
self.log.exception("Exception while running job")
output =[](json.loads(job.arguments))
except Exception:
self.log.exception("Exception while running job")
except gear.InterruptedError:
except Exception:
self.log.exception("Exception while getting job")
def handle_payload(self, args):
headers = args.get("headers")
body = args.get("body")
delivery = headers.get('x-github-delivery')
self.log.debug("Github Webhook Received: {delivery}".format(
# TODO(jlk): Validate project in the request is a project we know
self.__dispatch_event(body, headers)
output = {'return_code': 200}
except Exception:
output = {'return_code': 503}
self.log.exception("Exception handling Github event:")
return output
def __dispatch_event(self, body, headers):
event = headers['x-github-event']
self.log.debug("X-Github-Event: " + event)
except KeyError:
self.log.debug("Request headers missing the X-Github-Event.")
raise Exception('Please specify a X-Github-Event header.')
delivery = headers.get('x-github-delivery')
self.connection.addEvent(body, event, delivery)
except Exception:
message = 'Exception deserializing JSON body'
# TODO(jlk): Raise this as something different?
raise Exception(message)
def start(self):
self._running = True
server = self.config.get('gearman', 'server')
port = get_default(self.config, 'gearman', 'port', 4730)
ssl_key = get_default(self.config, 'gearman', 'ssl_key')
ssl_cert = get_default(self.config, 'gearman', 'ssl_cert')
ssl_ca = get_default(self.config, 'gearman', 'ssl_ca')
self.gearman = gear.TextWorker('Zuul Github Connector')
self.log.debug("Connect to gearman")
self.gearman.addServer(server, port, ssl_key, ssl_cert, ssl_ca,
keepalive=True, tcp_keepidle=60,
tcp_keepintvl=30, tcp_keepcnt=5)
self.log.debug("Waiting for server")
for job in
def stop(self):
self._running = False
# We join here to avoid whitelisting the thread -- if it takes more
# than 5s to stop in tests, there's a problem.
class GithubEventLogAdapter(logging.LoggerAdapter):
def process(self, msg, kwargs):
msg, kwargs = super(GithubEventLogAdapter, self).process(msg, kwargs)
msg = '[delivery: %s] %s' % (kwargs['extra']['delivery'], msg)
return msg, kwargs
class GithubEventProcessor(object):
def __init__(self, connector, event_tuple):
self.connector = connector
self.connection = connector.connection
self.ts, self.body, self.event_type, = event_tuple
logger = logging.getLogger("zuul.GithubEventConnector")
self.log = GithubEventLogAdapter(logger, {'delivery':})
def run(self):
self.log.debug("Starting event processing, queue length %s",
self.log.debug("Finished event processing")
def _handle_event(self):
if self.connector._stopped:
# If there's any installation mapping information in the body then
# update the project mapping before any requests are made.
installation_id = self.body.get('installation', {}).get('id')
project_name = self.body.get('repository', {}).get('full_name')
if installation_id and project_name:
old_id = self.connection.installation_map.get(project_name)
if old_id and old_id != installation_id:
msg = "Unexpected installation_id change for %s. %d -> %d."
self.log.warning(msg, project_name, old_id, installation_id)
self.connection.installation_map[project_name] = installation_id
method = getattr(self, '_event_' + self.event_type)
except AttributeError:
# TODO(jlk): Gracefully handle event types we don't care about
# instead of logging an exception.
message = "Unhandled X-Github-Event: {0}".format(self.event_type)
# Returns empty on unhandled events
self.log.debug("Handling %s event", self.event_type)
event = method()
except Exception:
self.log.exception('Exception when handling event:')
event = None
if event: =
project = self.connection.source.getProject(event.project_name)
if event.change_number:
self.log.debug("Refreshed change %s,%s",
event.change_number, event.patch_number)
# If this event references a branch and we're excluding unprotected
# branches, we might need to check whether the branch is now
# protected.
if event.branch:
b = self.connection.getBranch(, event.branch)
if b is not None:
branch_protected = b.get('protected')
project, event.branch, branch_protected, self.log)
event.branch_protected = branch_protected
# This can happen if the branch was deleted in GitHub. In
# this case we assume that the branch COULD have been
# protected before. The cache update is handled by the
# push event, so we don't touch the cache here again.
event.branch_protected = True
event.project_hostname = self.connection.canonical_hostname
def _event_push(self):
base_repo = self.body.get('repository')
event = GithubTriggerEvent()
event.trigger_name = 'github'
event.project_name = base_repo.get('full_name')
event.type = 'push'
event.branch_updated = True
event.ref = self.body.get('ref')
event.oldrev = self.body.get('before')
event.newrev = self.body.get('after')
event.commits = self.body.get('commits')
ref_parts = event.ref.split('/', 2) # ie, ['refs', 'heads', 'foo/bar']
if ref_parts[1] == "heads":
# necessary for the scheduler to match against particular branches
event.branch = ref_parts[2]
# This checks whether the event created or deleted a branch so
# that Zuul may know to perform a reconfiguration on the
# project.
if event.oldrev == '0' * 40:
event.branch_created = True
if event.newrev == '0' * 40:
event.branch_deleted = True
if event.branch:
project = self.connection.source.getProject(event.project_name)
if event.branch_deleted:
# We currently cannot determine if a deleted branch was
# protected so we need to assume it was. GitHub doesn't allow
# deletion of protected branches but we don't get a
# notification about branch protection settings. Thus we don't
# know if branch protection has been disabled before deletion
# of the branch.
# FIXME(tobiash): Find a way to handle that case
self.connection._clearBranchCache(project, self.log)
elif event.branch_created:
# A new branch never can be protected because that needs to be
# configured after it has been created.
self.connection._clearBranchCache(project, self.log)
return event
def _event_pull_request(self):
action = self.body.get('action')
pr_body = self.body.get('pull_request')
event = self._pull_request_to_event(pr_body)
event.account = self._get_sender(self.body)
event.type = 'pull_request'
if action == 'opened':
event.action = 'opened'
elif action == 'synchronize':
event.action = 'changed'
elif action == 'closed':
event.action = 'closed'
elif action == 'reopened':
event.action = 'reopened'
elif action == 'labeled':
event.action = 'labeled'
event.label = self.body['label']['name']
elif action == 'unlabeled':
event.action = 'unlabeled'
event.label = self.body['label']['name']
elif action == 'edited':
event.action = 'edited'
return None
return event
def _event_issue_comment(self):
"""Handles pull request comments"""
action = self.body.get('action')
if action != 'created':
if not self.body.get('issue', {}).get('pull_request'):
# Do not process non-PR issue comment
pr_body = self._issue_to_pull_request(self.body)
if pr_body is None:
event = self._pull_request_to_event(pr_body)
event.account = self._get_sender(self.body)
event.comment = self.body.get('comment').get('body')
event.type = 'pull_request'
event.action = 'comment'
return event
def _event_pull_request_review(self):
"""Handles pull request reviews"""
pr_body = self.body.get('pull_request')
if pr_body is None:
review = self.body.get('review')
if review is None:
event = self._pull_request_to_event(pr_body)
event.state = review.get('state')
event.account = self._get_sender(self.body)
event.type = 'pull_request_review'
event.action = self.body.get('action')
return event
def _event_status(self):
action = self.body.get('action')
if action == 'pending':
project = self.body.get('name')
pr_body = self.connection.getPullBySha(
self.body['sha'], project, self.log)
if pr_body is None:
event = self._pull_request_to_event(pr_body)
event.account = self._get_sender(self.body)
event.type = 'pull_request'
event.action = 'status'
# Github API is silly. Webhook blob sets author data in
# 'sender', but API call to get status puts it in 'creator'.
# Duplicate the data so our code can look in one place
self.body['creator'] = self.body['sender']
event.status = "%s:%s:%s" % _status_as_tuple(self.body)
return event
def _issue_to_pull_request(self, body):
number = body.get('issue').get('number')
project_name = body.get('repository').get('full_name')
pr_body, pr_obj = self.connection.getPull(
project_name, number, self.log)
if pr_body is None:
self.log.debug('Pull request #%s not found in project %s' %
(number, project_name))
return pr_body
def _pull_request_to_event(self, pr_body):
event = GithubTriggerEvent()
event.trigger_name = 'github'
base = pr_body.get('base')
base_repo = base.get('repo')
head = pr_body.get('head')
event.project_name = base_repo.get('full_name')
event.change_number = pr_body.get('number')
event.change_url = self.connection.getPullUrl(event.project_name,
event.updated_at = pr_body.get('updated_at')
event.branch = base.get('ref')
event.ref = "refs/pull/" + str(pr_body.get('number')) + "/head"
event.patch_number = head.get('sha')
event.title = pr_body.get('title')
return event
def _get_sender(self, body):
login = body.get('sender').get('login')
if login:
# TODO(tobiash): it might be better to plumb in the installation id
project = body.get('repository', {}).get('full_name')
user = self.connection.getUser(login, project)
self.log.debug("Got user %s", user)
return user
class GithubEventConnector(threading.Thread):
"""Move events from GitHub into the scheduler"""
log = logging.getLogger("zuul.GithubEventConnector")
def __init__(self, connection):
super(GithubEventConnector, self).__init__()
self.daemon = True
self.connection = connection
self._stopped = False
def stop(self):
self._stopped = True
def run(self):
while True:
if self._stopped:
data = self.connection.getEvent()
GithubEventProcessor(self, data).run()
except Exception:
self.log.exception("Exception moving GitHub event:")
class GithubUser(collections.Mapping):
log = logging.getLogger('zuul.GithubUser')
def __init__(self, username, connection, project):
self._connection = connection
self._username = username
self._data = None
self._project = project
def __getitem__(self, key):
return self._data[key]
def __iter__(self):
return iter(self._data)
def __len__(self):
return len(self._data)
def _init_data(self):
if self._data is None:
github = self._connection.getGithubClient(self._project)
user = github.user(self._username)
self.log.debug("Initialized data for user %s", self._username)
self._connection.log_rate_limit(self.log, github)
self._data = {
'username': user.login,
class GithubConnection(BaseConnection):
driver_name = 'github'
log = logging.getLogger("zuul.GithubConnection")
payload_path = 'payload'
def __init__(self, driver, connection_name, connection_config):
super(GithubConnection, self).__init__(
driver, connection_name, connection_config)
self._change_cache = {}
self._project_branch_cache_include_unprotected = {}
self._project_branch_cache_exclude_unprotected = {}
self.projects = {}
self.git_ssh_key = self.connection_config.get('sshkey')
self.server = self.connection_config.get('server', '')
self.canonical_hostname = self.connection_config.get(
'canonical_hostname', self.server)
self.source = driver.getSource(self)
self.event_queue = queue.Queue()
self._sha_pr_cache = GithubShaCache()
# Logging of rate limit is optional as this does additional requests
rate_limit_logging = self.connection_config.get(
'rate_limit_logging', 'true')
self._log_rate_limit = True
if rate_limit_logging.lower() == 'false':
self._log_rate_limit = False
if self.server == '':
self.base_url = GITHUB_BASE_URL
self.base_url = 'https://%s/api/v3' % self.server
# ssl verification must default to true
verify_ssl = self.connection_config.get('verify_ssl', 'true')
self.verify_ssl = True
if verify_ssl.lower() == 'false':
self.verify_ssl = False
self._github = None
self.app_id = None
self.app_key = None
self.sched = None
self.installation_map = {}
self.installation_token_cache = {}
# NOTE(jamielennox): Better here would be to cache to memcache or file
# or something external - but zuul already sucks at restarting so in
# memory probably doesn't make this much worse.
# NOTE(tobiash): Unlike documented cachecontrol doesn't priorize
# the etag caching but doesn't even re-request until max-age was
# elapsed.
# Thus we need to add a custom caching heuristic which simply drops
# the cache-control header containing max-age. This way we force
# cachecontrol to only rely on the etag headers.
class NoAgeHeuristic(BaseHeuristic):
def update_headers(self, response):
if 'cache-control' in response.headers:
del response.headers['cache-control']
self.cache_adapter = cachecontrol.CacheControlAdapter(
# The regex is based on the connection host. We do not yet support
# cross-connection dependency gathering
self.depends_on_re = re.compile(
r"^Depends-On: https://%s/.+/.+/pull/[0-9]+$" % self.server,
def onLoad(self):'Starting GitHub connection: %s' % self.connection_name)
self.gearman_worker = GithubGearmanWorker(self)'Authing to GitHub')
self._prime_installation_map()'Starting event connector')
self._start_event_connector()'Starting GearmanWorker')
def onStop(self):
# TODO(jeblair): remove this check which is here only so that
# zuul-web can call connections.stop to shut down the sql
# connection.
if hasattr(self, 'gearman_worker'):
def _start_event_connector(self):
self.github_event_connector = GithubEventConnector(self)
def _stop_event_connector(self):
if self.github_event_connector:
def _createGithubClient(self):
if self.server != '':
url = 'https://%s/' % self.server
if not self.verify_ssl:
# disabling ssl verification is evil so emit a warning
self.log.warning("SSL verification disabled for "
"GitHub Enterprise")
github = github3.GitHubEnterprise(url, verify=self.verify_ssl)
github = github3.GitHub()
# anything going through requests to http/s goes through cache
github.session.mount('http://', self.cache_adapter)
github.session.mount('https://', self.cache_adapter)
# Add properties to store project and user for logging later
github._zuul_project = None
github._zuul_user_id = None
return github
def _authenticateGithubAPI(self):
config = self.connection_config
api_token = config.get('api_token')
app_id = config.get('app_id')
app_key = None
app_key_file = config.get('app_key')
self._github = self._createGithubClient()
if api_token:
if app_key_file:
with open(app_key_file, 'r') as f:
app_key =
except IOError:
m = "Failed to open app key file for reading: %s"
self.log.error(m, app_key_file)
if (app_id or app_key) and \
not (app_id and app_key):
self.log.warning("You must provide an app_id and "
"app_key to use installation based "
if app_id:
self.app_id = int(app_id)
if app_key:
self.app_key = app_key
def _get_app_auth_headers(self):
now =
expiry = now + datetime.timedelta(minutes=5)
data = {'iat': now, 'exp': expiry, 'iss': self.app_id}
app_token = jwt.encode(data,
headers = {'Accept': PREVIEW_JSON_ACCEPT,
'Authorization': 'Bearer %s' % app_token}
return headers
def _get_installation_key(self, project, user_id=None, inst_id=None,
installation_id = inst_id
if project is not None:
installation_id = self.installation_map.get(project)
if not installation_id:
if reprime:
# prime installation map and try again without refreshing
return self._get_installation_key(project,
self.log.error("No installation ID available for project %s",
return ''
now =
token, expiry = self.installation_token_cache.get(installation_id,
(None, None))
if ((not expiry) or (not token) or (now >= expiry)):
headers = self._get_app_auth_headers()
url = "%s/installations/%s/access_tokens" % (self.base_url,
json_data = {'user_id': user_id} if user_id else None
response =, headers=headers, json=json_data)
data = response.json()
expiry = iso8601.parse_date(data['expires_at'])
expiry -= datetime.timedelta(minutes=2)
token = data['token']
self.installation_token_cache[installation_id] = (token, expiry)
return token
def _get_repos_of_installation(self, inst_id, headers):
url = '%s/installation/repositories?per_page=100' % self.base_url
project_names = []
while url:
self.log.debug("Fetching repos for install %s" % inst_id)
response = requests.get(url, headers=headers)
repos = response.json()
for repo in repos.get('repositories'):
project_name = repo.get('full_name')
# check if we need to do further paged calls
url = response.links.get('next', {}).get('url')
return project_names
def _prime_installation_map(self):
"""Walks each app install for the repos to prime install IDs"""
if not self.app_id:
url = '%s/app/installations' % self.base_url
installations = []
headers = self._get_app_auth_headers()
page = 1
while url:
self.log.debug("Fetching installations for GitHub app "
"(page %s)" % page)
page += 1
response = requests.get(url, headers=headers)
# check if we need to do further paged calls
url = response.links.get(
'next', {}).get('url')
headers_per_inst = {}
with concurrent.futures.ThreadPoolExecutor() as executor:
token_by_inst = {}
for install in installations:
inst_id = install.get('id')
token_by_inst[inst_id] = executor.submit(
self._get_installation_key, project=None, inst_id=inst_id)
for inst_id, result in token_by_inst.items():
token = result.result()
headers_per_inst[inst_id] = {
'Authorization': 'token %s' % token
project_names_by_inst = {}
for install in installations:
inst_id = install.get('id')
headers = headers_per_inst[inst_id]
project_names_by_inst[inst_id] = executor.submit(
self._get_repos_of_installation, inst_id, headers)
for inst_id, result in project_names_by_inst.items():
project_names = result.result()
for project_name in project_names:
self.installation_map[project_name] = inst_id
def addEvent(self, data, event=None, delivery=None):
return self.event_queue.put((time.time(), data, event, delivery))
def getEvent(self):
return self.event_queue.get()
def getEventQueueSize(self):
return self.event_queue.qsize()
def eventDone(self):
def getGithubClient(self,
# if you're authenticating for a project and you're an integration then
# you need to use the installation specific token.
if project and self.app_id:
github = self._createGithubClient()
github.login(token=self._get_installation_key(project, user_id))
github._zuul_project = project
github._zuul_user_id = user_id
return github
# if we're using api_key authentication then this is already token
# authenticated, if not then anonymous is the best we have.
return self._github
def maintainCache(self, relevant):
remove = set()
for key, change in self._change_cache.items():
if change not in relevant:
for key in remove:
del self._change_cache[key]
def getChange(self, event, refresh=False):
"""Get the change representing an event."""
project = self.source.getProject(event.project_name)
if event.change_number:
change = self._getChange(project, event.change_number,
event.patch_number, refresh=refresh)
if hasattr(event, 'change_url') and event.change_url:
change.url = event.change_url
# The event has no change url so just construct it
change.url = self.getPullUrl(
event.project_name, event.change_number)
change.uris = [
'%s/%s/pull/%s' % (self.server, project, change.number),
change.source_event = event
change.is_current_patchset = ('head').get('sha') ==
if event.ref and event.ref.startswith('refs/tags/'):
change = Tag(project)
change.tag = event.ref[len('refs/tags/'):]
elif event.ref and event.ref.startswith('refs/heads/'):
change = Branch(project)
change.branch = event.ref[len('refs/heads/'):]
change = Ref(project)
change.ref = event.ref
change.oldrev = event.oldrev
change.newrev = event.newrev
change.url = self.getGitwebUrl(project, sha=event.newrev)
change.source_event = event
if hasattr(event, 'commits'):
change.files = self.getPushedFileNames(event)
return change
def _getChange(self, project, number, patchset=None, refresh=False):
key = (, number, patchset)
change = self._change_cache.get(key)
if change and not refresh:
return change
if not change:
change = PullRequest(
change.project = project
change.number = number
change.patchset = patchset
self._change_cache[key] = change
except Exception:
if key in self._change_cache:
del self._change_cache[key]
return change
def getChangesDependingOn(self, change, projects, tenant):
changes = []
if not change.uris:
return changes
# Get a list of projects with unique installation ids
installation_ids = set()
installation_projects = set()
if projects:
# We only need to find changes in projects in the supplied
# ChangeQueue. Find all of the github installations for
# all of those projects, and search using each of them, so
# that if we get the right results based on the
# permissions granted to each of the installations. The
# common case for this is likely to be just one
# installation -- change queues aren't likely to span more
# than one installation.
for project in projects:
installation_id = self.installation_map.get(
if installation_id not in installation_ids:
# We aren't in the context of a change queue and we just
# need to query all installations of this tenant. This currently
# only happens if certain features of the zuul trigger are
# used; generally it should be avoided.
for project_name, installation_id in self.installation_map.items():
trusted, project = tenant.getProject(project_name)
# ignore projects from different tenants
if not project:
if installation_id not in installation_ids:
keys = set()
pattern = ' OR '.join(change.uris)
query = '%s type:pr is:open in:body' % pattern
# Repeat the search for each installation id (project)
for installation_project in installation_projects:
github = self.getGithubClient(installation_project)
for issue in github.search_issues(query=query):
pr = issue.issue.pull_request().as_dict()
if not pr.get('url'):
# the issue provides no good description of the project :\
org, proj, _, num = pr.get('url').split('/')[-4:]
proj = pr.get('base').get('repo').get('full_name')
sha = pr.get('head').get('sha')
key = (proj, num, sha)
if key in keys:
self.log.debug("Found PR %s/%s needs %s/%s" %
(proj, num,,
self.log.debug("Ran search issues: %s", query)
self.log_rate_limit(self.log, github)
for key in keys:
(proj, num, sha) = key
project = self.source.getProject(proj)
change = self._getChange(project, int(num), patchset=sha)
return changes
def _updateChange(self, change):"Updating %s" % (change,)), pr_obj = self.getPull(, change.number)
change.ref = "refs/pull/%s/head" % change.number
change.branch ='base').get('ref')
# Don't overwrite the files list. The change object is bound to a
# specific revision and thus the changed files won't change. This is
# important if we got the files later because of the 300 files limit.
if not change.files:
change.files ='files')
# Github's pull requests files API only returns at max
# the first 300 changed files of a PR in alphabetical order.
if len(change.files) <'changed_files', 0):
self.log.warning("Got only %s files but PR has %s files.",
len(change.files),'changed_files', 0))
# In this case explicitly set change.files to None to signalize
# that we need to ask the mergers later in pipeline processing.
# We cannot query the files here using the mergers because this
# can slow down the github event queue considerably.
change.files = None
change.title ='title') ='state') == 'open'
change.is_merged ='merged')
change.status = self._get_statuses(change.project,
change.patchset) = self.getPullReviews(pr_obj, change.project,
change.labels ='labels')
# ensure message is at least an empty string
message ="body") or ""
if change.title:
if message:
message = "{}\n\n{}".format(change.title, message)
message = change.title
change.message = message
# Note(tobiash): The updated_at timestamp is a moving target that is
# not bound to the pull request 'version' we can solve that by just not
# updating the timestamp if the pull request is updated in the cache.
# This way the old pull request object retains its old timestamp and
# the update check works.
if not change.updated_at:
change.updated_at = self._ghTimestampToDate('updated_at'))
change.url ='url')
change.uris = [
'%s/%s/pull/%s' % (self.server,,
if self.sched:
return change
def getGitUrl(self, project: Project):
if self.git_ssh_key:
return 'ssh://git@%s/%s.git' % (self.server,
# if app_id is configured but self.app_id is empty we are not
# authenticated yet against github as app
if not self.app_id and self.connection_config.get('app_id', None):
if self.app_id:
# We may be in the context of a merger or executor here. The
# mergers and executors don't receive webhook events so they miss
# new repository installations. In order to cope with this we need
# to reprime the installation map if we don't find the repo there.
installation_key = self._get_installation_key(,
return 'https://x-access-token:%s@%s/%s' % (installation_key,
return 'https://%s/%s' % (self.server,
def getGitwebUrl(self, project, sha=None):
url = 'https://%s/%s' % (self.server, project)
if sha is not None:
url += '/commit/%s' % sha
return url
def getProject(self, name):
return self.projects.get(name)
def addProject(self, project):
self.projects[] = project
def clearBranchCache(self):
self._project_branch_cache_exclude_unprotected = {}
self._project_branch_cache_include_unprotected = {}
def getProjectBranches(self, project, tenant):
exclude_unprotected = tenant.getExcludeUnprotectedBranches(project)
if exclude_unprotected:
cache = self._project_branch_cache_exclude_unprotected
cache = self._project_branch_cache_include_unprotected
branches = cache.get(
if branches is not None:
return branches
github = self.getGithubClient(
url = github.session.build_url('repos',,
headers = {'Accept': 'application/vnd.github.loki-preview+json'}
params = {'per_page': 100}
if exclude_unprotected:
params['protected'] = 1
branches = []
while url:
resp = github.session.get(
url, headers=headers, params=params)
# check if we need to do further paged calls
url = resp.links.get('next', {}).get('url')
if resp.status_code == 403:
rate_limit = github.rate_limit()
if rate_limit['resources']['core']['remaining'] == 0:
"Rate limit exceeded, using empty branch list")
return []
branches.extend([x['name'] for x in resp.json()])
self.log_rate_limit(self.log, github)
cache[] = branches
return branches
def getBranch(self, project_name, branch):
github = self.getGithubClient(project_name)
# Note that we directly use a web request here because if we use the
# api directly we need a repository object which needs
# an unneeded web request during creation.
url = github.session.build_url('repos', project_name,
'branches', branch)
resp = github.session.get(url)
if resp.status_code == 404:
return None
return resp.json()
def getPullUrl(self, project, number):
return '%s/pull/%s' % (self.getGitwebUrl(project), number)
def getPull(self, project_name, number, log=None):
if log is None:
log = self.log
github = self.getGithubClient(project_name)
owner, proj = project_name.split('/')
for retry in range(5):
probj = github.pull_request(owner, proj, number)
if probj is not None:
self.log.warning("Pull request #%s of %s/%s returned None!" % (
number, owner, proj))
# Get the issue obj so we can get the labels (this is silly)
# TODO: this may be able to be simplified after
issueobj = probj.issue()
pr = probj.as_dict()
pr['files'] = [f.filename for f in probj.files()]
except github3.exceptions.ServerError as exc:
# NOTE: For PRs with a lot of lines changed, Github will return
# an error (HTTP 500) because it can't generate the diff.
self.log.warning("Failed to get list of files from Github. "
"Using empty file list to trigger update "
"via the merger: %s", exc)
pr['files'] = []
pr['labels'] = [ for l in issueobj.labels()]
self._sha_pr_cache.update(project_name, pr)
log.debug('Got PR %s#%s', project_name, number)
self.log_rate_limit(self.log, github)
return (pr, probj)
def canMerge(self, change, allow_needs):
# NOTE: The mergeable call may get a false (null) while GitHub is
# calculating if it can merge. The library will just return
# that as false. This could lead to false negatives. So don't do this
# call here and only evaluate branch protection settings. Any merge
# conflicts which would block merging finally will be detected by
# the zuul-mergers anyway.
github = self.getGithubClient(
owner, proj ='/')
pull = github.pull_request(owner, proj, change.number)
protection = self._getBranchProtection(, change.branch)
if not self._hasRequiredStatusChecks(allow_needs, protection, pull):
return False
required_reviews = protection.get(
if required_reviews:
if required_reviews.get('require_code_owner_reviews'):
# we need to process the reviews using code owners
# TODO(tobiash): not implemented yet
# we need to process the review using access rights
# TODO(tobiash): not implemented yet
return True
def getPullBySha(self, sha, project_name, log):
cached_pr_numbers = self._sha_pr_cache.get(project_name, sha)
if len(cached_pr_numbers) > 1:
raise Exception('Multiple pulls found with head sha %s' % sha)
if len(cached_pr_numbers) == 1:
for pr in cached_pr_numbers:
pr_body, pr_obj = self.getPull(project_name, pr, log)
return pr_body
pulls = []
github = self.getGithubClient(project_name)
owner, project = project_name.split('/')
repo = github.repository(owner, project)
for pr in repo.pull_requests(state='open',
# We sort by updated from oldest to newest
# as that will prefer more recently
# PRs in our LRU cache.
pr_dict = pr.as_dict()
self._sha_pr_cache.update(project_name, pr_dict)
if pr.head.sha != sha:
if pr_dict in pulls:
log.debug('Got PR on project %s for sha %s', project_name, sha)
self.log_rate_limit(self.log, github)
if len(pulls) > 1:
raise Exception('Multiple pulls found with head sha %s' % sha)
if len(pulls) == 0:
return None
return pulls.pop()
def getPullReviews(self, pr_obj, project, number):
# make a list out of the reviews so that we complete our
# API transaction
revs = [review.as_dict() for review in]
self.log.debug('Got reviews for PR %s#%s', project, number)
permissions = {}
reviews = {}
for rev in revs:
user = rev.get('user').get('login')
review = {
'by': {
'username': user,
'email': rev.get('user').get('email'),
'grantedOn': int(time.mktime(self._ghTimestampToDate(
review['type'] = rev.get('state').lower()
review['submitted_at'] = rev.get('submitted_at')
# Get user's rights. A user always has read to leave a review
review['permission'] = 'read'
if user in permissions:
permission = permissions[user]
permission = self.getRepoPermission(, user)
permissions[user] = permission
if permission == 'write':
review['permission'] = 'write'
if permission == 'admin':
review['permission'] = 'admin'
if user not in reviews:
reviews[user] = review
# if there are multiple reviews per user, keep the newest
# note that this breaks the ability to set the 'older-than'
# option on a review requirement.
# BUT do not keep the latest if it's a 'commented' type and the
# previous review was 'approved' or 'changes_requested', as
# the GitHub model does not change the vote if a comment is
# added after the fact. THANKS GITHUB!
if review['grantedOn'] > reviews[user]['grantedOn']:
if (review['type'] == 'commented' and reviews[user]['type']
in ('approved', 'changes_requested')):
self.log.debug("Discarding comment review %s due to "
"an existing vote %s" % (review,
reviews[user] = review
return reviews.values()
def _getBranchProtection(self, project_name: str, branch: str):
github = self.getGithubClient(project_name)
url = github.session.build_url('repos', project_name,
'branches', branch,
headers = {'Accept': 'application/vnd.github.loki-preview+json'}
resp = github.session.get(url, headers=headers)
if resp.status_code == 404:
return {}
return resp.json()
def _hasRequiredStatusChecks(self, allow_needs, protection, pull):
if not protection:
# There are no protection settings -> ok by definition
return True
required_contexts = protection.get(
'required_status_checks', {}).get('contexts')
if not required_contexts:
# There are no required contexts -> ok by definition
return True
# Strip allow_needs as we will set this in the gate ourselves
required_contexts = set(
[x for x in required_contexts if x not in allow_needs])
# NOTE(tobiash): We cannot just take the last commit in the list
# because it is not sorted that the head is the last one in every case.
# E.g. when doing a re-merge from the target the PR head can be
# somewhere in the middle of the commit list. Thus we need to search
# the whole commit list for the PR head commit which has the statuses
# attached.
commits = list(pull.commits())
commit = None
for c in commits:
if c.sha == pull.head.sha:
commit = c
# Get successful statuses
successful = set([s.context for s in commit.status().statuses
if s.state == 'success'])
# Required contexts must be a subset of the successful contexts as
# we allow additional successful status contexts we don't care about.
return required_contexts.issubset(successful)
def getUser(self, login, project):
return GithubUser(login, self, project)
def getUserUri(self, login):
return 'https://%s/%s' % (self.server, login)
def getRepoPermission(self, project, login):
github = self.getGithubClient(project)
owner, proj = project.split('/')
# This gets around a missing API call
# need preview header
headers = {'Accept': 'application/vnd.github.korra-preview'}
# Create a repo object
repository = github.repository(owner, proj)
if not repository:
return 'none'
# Build up a URL
url = repository._build_url('collaborators', login, 'permission',
# Get the data
perms = repository._get(url, headers=headers)
self.log.debug("Got repo permissions for %s/%s", owner, proj)
self.log_rate_limit(self.log, github)
# no known user, maybe deleted since review?
if perms.status_code == 404:
return 'none'
# get permissions from the data
return perms.json().get('permission', 'none')
def commentPull(self, project, pr_number, message):
github = self.getGithubClient(project)
owner, proj = project.split('/')
repository = github.repository(owner, proj)
pull_request = repository.issue(pr_number)
self.log.debug("Commented on PR %s/%s#%s", owner, proj, pr_number)
self.log_rate_limit(self.log, github)
def mergePull(self, project, pr_number, commit_message='', sha=None):
github = self.getGithubClient(project)
owner, proj = project.split('/')
pull_request = github.pull_request(owner, proj, pr_number)
result = pull_request.merge(commit_message=commit_message, sha=sha)
except github3.exceptions.MethodNotAllowed as e:
raise MergeFailure('Merge was not successful due to mergeability'
' conflict, original error is %s' % e)
self.log.debug("Merged PR %s/%s#%s", owner, proj, pr_number)
self.log_rate_limit(self.log, github)
if not result:
raise Exception('Pull request was not merged')
def _getCommit(self, repository, sha, retries=5):
return repository.commit(sha)
except github3.exceptions.NotFoundError:
self.log.warning("Commit %s of project %s returned None",
if retries <= 0:
return self._getCommit(repository, sha, retries - 1)
def getCommitStatuses(self, project, sha):
github = self.getGithubClient(project)
owner, proj = project.split('/')
repository = github.repository(owner, proj)
commit = self._getCommit(repository, sha, 5)
# make a list out of the statuses so that we complete our
# API transaction
statuses = [status.as_dict() for status in commit.statuses()]
self.log.debug("Got commit statuses for sha %s on %s", sha, project)
self.log_rate_limit(self.log, github)
return statuses
def setCommitStatus(self, project, sha, state, url='', description='',
github = self.getGithubClient(project)
owner, proj = project.split('/')
repository = github.repository(owner, proj)
repository.create_status(sha, state, url, description, context)
self.log.debug("Set commit status to %s for sha %s on %s",
state, sha, project)
self.log_rate_limit(self.log, github)
def labelPull(self, project, pr_number, label):
github = self.getGithubClient(project)
owner, proj = project.split('/')
pull_request = github.issue(owner, proj, pr_number)
self.log.debug("Added label %s to %s#%s", label, proj, pr_number)
self.log_rate_limit(self.log, github)
def unlabelPull(self, project, pr_number, label):
github = self.getGithubClient(project)
owner, proj = project.split('/')
pull_request = github.issue(owner, proj, pr_number)
self.log.debug("Removed label %s from %s#%s", label, proj, pr_number)
self.log_rate_limit(self.log, github)
def getPushedFileNames(self, event):
files = set()
for c in event.commits:
for f in c.get('added') + c.get('modified') + c.get('removed'):
return list(files)
def _ghTimestampToDate(self, timestamp):
return time.strptime(timestamp, '%Y-%m-%dT%H:%M:%SZ')
def _get_statuses(self, project, sha):
# A ref can have more than one status from each context,
# however the API returns them in order, newest first.
# So we can keep track of which contexts we've already seen
# and throw out the rest. Our unique key is based on
# the user and the context, since context is free form and anybody
# can put whatever they want there. We want to ensure we track it
# by user, so that we can require/trigger by user too.
seen = []
statuses = []
for status in self.getCommitStatuses(, sha):
stuple = _status_as_tuple(status)
if "%s:%s" % (stuple[0], stuple[1]) not in seen:
statuses.append("%s:%s:%s" % stuple)
seen.append("%s:%s" % (stuple[0], stuple[1]))
return statuses
def getWebController(self, zuul_web):
return GithubWebController(zuul_web, self)
def validateWebConfig(self, config, connections):
if 'webhook_token' not in self.connection_config:
raise Exception(
"webhook_token not found in config for connection %s" %
return True
def log_rate_limit(self, log, github):
if not self._log_rate_limit:
rate_limit = github.rate_limit()
remaining = rate_limit['resources']['core']['remaining']
reset = rate_limit['resources']['core']['reset']
except Exception:
if github._zuul_user_id:
log.debug('GitHub API rate limit (%s, %s) remaining: %s reset: %s',
github._zuul_project, github._zuul_user_id, remaining,
log.debug('GitHub API rate limit remaining: %s reset: %s',
remaining, reset)
def _clearBranchCache(self, project, log):
log.debug("Clearing branch cache for %s",
for cache in [
del cache[]
except KeyError:
def checkBranchCache(self, project, branch, protected, log):
# If the branch appears in the exclude_unprotected cache but
# is unprotected, clear the exclude cache.
# If the branch does not appear in the exclude_unprotected
# cache but is protected, clear the exclude cache.
# All branches should always appear in the include_unprotected
# cache, so we never clear it.
cache = self._project_branch_cache_exclude_unprotected
branches = cache.get(, [])
if (branch in branches) and (not protected):
log.debug("Clearing protected branch cache for %s",
del cache[]
except KeyError:
if (branch not in branches) and (protected):
log.debug("Clearing protected branch cache for %s",
del cache[]
except KeyError:
class GithubWebController(BaseWebController):
log = logging.getLogger("zuul.GithubWebController")
def __init__(self, zuul_web, connection):
self.connection = connection
self.zuul_web = zuul_web
self.token = self.connection.connection_config.get('webhook_token')
def _validate_signature(self, body, headers):
request_signature = headers['x-hub-signature']
except KeyError:
raise cherrypy.HTTPError(401, 'X-Hub-Signature header missing.')
payload_signature = _sign_request(body, self.token)
self.log.debug("Payload Signature: {0}".format(str(payload_signature)))
self.log.debug("Request Signature: {0}".format(str(request_signature)))
if not hmac.compare_digest(
str(payload_signature), str(request_signature)):
raise cherrypy.HTTPError(
'Request signature does not match calculated payload '
'signature. Check that secret is correct.')
return True
@cherrypy.expose'application/json; charset=utf-8')
def payload(self):
# Note(tobiash): We need to normalize the headers. Otherwise we will
# have trouble to get them from the dict afterwards.
# e.g.
# GitHub: sent: X-GitHub-Event received: X-GitHub-Event
# urllib: sent: X-GitHub-Event received: X-Github-Event
# We cannot easily solve this mismatch as every http processing lib
# modifies the header casing in its own way and by specification http
# headers are case insensitive so just lowercase all so we don't have
# to take care later.
# Note(corvus): Don't use cherrypy's json_in here so that we
# can validate the signature.
headers = dict()
for key, value in cherrypy.request.headers.items():
headers[key.lower()] = value
body =
self._validate_signature(body, headers)
# We cannot send the raw body through gearman, so it's easy to just
# encode it as json, after decoding it as utf-8
json_body = json.loads(body.decode('utf-8'))
job = self.zuul_web.rpc.submitJob(
'github:%s:payload' % self.connection.connection_name,
{'headers': headers, 'body': json_body})
return json.loads([0])
def _status_as_tuple(status):
"""Translate a status into a tuple of user, context, state"""
creator = status.get('creator')
if not creator:
user = "Unknown"
user = creator.get('login')
context = status.get('context')
state = status.get('state')
return (user, context, state)