Add support for GHE repository cache

Change-Id: Iec87857aa58f71875d780da3698047dae01120d7
This commit is contained in:
James E. Blair 2022-05-04 16:08:57 -07:00
parent 4151f91684
commit c41fcbe483
9 changed files with 195 additions and 14 deletions

View File

@ -182,6 +182,37 @@ The supported options in ``zuul.conf`` connections are:
Enable or disable GitHub rate limit logging. If rate limiting is disabled
in GitHub Enterprise this can save some network round trip times.
.. attr:: repo_cache
To configure Zuul to use a GitHub Enterprise `repository cache
<https://docs.github.com/en/enterprise-server@3.3/admin/enterprise-management/caching-repositories/about-repository-caching>`_
set this value to the hostname of the cache (e.g.,
``europe-ci.github.example.com``). Zuul will fetch commits as
well as determine the global repo state of repositories used in
jobs from this host.
This setting is incompatible with :attr:`<github
connection>.sshkey`.
Because the repository cache may be several minutes behind the
canonical site, enabling this setting automatically sets the
default :attr:`<github connection>.repo_retry_timeout` to 600
seconds. That setting may still be overidden to specify a
different value.
.. attr:: repo_retry_timeout
This setting is only used if :attr:`<github
connection>.repo_cache` is set. It specifies the amount of time
in seconds that Zuul mergers and executors should spend
attempting to fetch git commits which are not available from the
GitHub repository cache host.
When :attr:`<github connection>.repo_cache` is set, this value
defaults to 600 seconds, but it can be overridden. Zuul retries
git fetches every 30 seconds, and this value will be rounded up
to the next highest multiple of 30 seconds.
Trigger Configuration
---------------------
GitHub webhook events can be configured as triggers.

View File

@ -0,0 +1,8 @@
---
features:
- |
The GitHub driver now has support for using a GitHub Enterprise
`repository cache
<https://docs.github.com/en/enterprise-server@3.3/admin/enterprise-management/caching-repositories/about-repository-caching>`_.
See :attr:`<github connection>.repo_cache` for information on how
to configure it.

View File

@ -333,11 +333,9 @@ class GitlabDriverMock(GitlabDriver):
class TestConnectionRegistry(ConnectionRegistry):
def __init__(self, changes: Dict[str, Dict[str, Change]],
config: ConfigParser, additional_event_queues,
upstream_root: str, poller_events,
git_url_with_auth: bool,
add_cleanup: Callable[[Callable[[], None]], None]):
def __init__(self, changes, config, additional_event_queues,
upstream_root, poller_events, git_url_with_auth,
add_cleanup):
self.connections = OrderedDict()
self.drivers = {}
@ -2952,6 +2950,8 @@ class FakeGithubConnection(githubconnection.GithubConnection):
prefix = 'file://x-access-token:%s@' % auth_token
else:
prefix = ''
if self.repo_cache:
return prefix + os.path.join(self.repo_cache, str(project))
return prefix + os.path.join(self.upstream_root, str(project))
def real_getGitUrl(self, project):

View File

@ -23,12 +23,14 @@ import textwrap
from unittest import mock, skip
import git
import gitdb
import github3.exceptions
from tests.fakegithub import FakeFile, FakeGithubEnterpriseClient
from zuul.driver.github.githubconnection import GithubShaCache
from zuul.zk.layout import LayoutState
from zuul.lib import strings
from zuul.merger.merger import Repo
from zuul.model import MergeRequest, EnqueueEvent, DequeueEvent
from tests.base import (AnsibleZuulTestCase, BaseTestCase,
@ -2407,7 +2409,7 @@ class TestCheckRunAnnotations(ZuulGithubAppTestCase, AnsibleZuulTestCase):
})
class TestGithubDriverEnterise(ZuulGithubAppTestCase):
class TestGithubDriverEnterprise(ZuulGithubAppTestCase):
config_file = 'zuul-github-driver-enterprise.conf'
scheduler_count = 1
@ -2445,7 +2447,7 @@ class TestGithubDriverEnterise(ZuulGithubAppTestCase):
self.assertEqual(len(A.comments), 0)
class TestGithubDriverEnteriseLegacy(ZuulGithubAppTestCase):
class TestGithubDriverEnterpriseLegacy(ZuulGithubAppTestCase):
config_file = 'zuul-github-driver-enterprise.conf'
scheduler_count = 1
@ -2485,3 +2487,103 @@ class TestGithubDriverEnteriseLegacy(ZuulGithubAppTestCase):
r'.*I shouldnt be seen.*',
re.DOTALL)))
self.assertEqual(len(A.comments), 0)
class TestGithubDriverEnterpriseCache(ZuulGithubAppTestCase):
config_file = 'zuul-github-driver-enterprise.conf'
scheduler_count = 1
def setup_config(self, config_file):
self.upstream_cache_root = self.upstream_root + '-cache'
config = super().setup_config(config_file)
# This adds the GHE repository cache feature
config.set('connection github', 'repo_cache', self.upstream_cache_root)
config.set('connection github', 'repo_retry_timeout', '30')
# Synchronize the upstream repos to the upstream repo cache
self.synchronize_repo('org/common-config')
self.synchronize_repo('org/project')
return config
def init_repo(self, project, tag=None):
super().init_repo(project, tag)
# After creating the upstream repo, also create the empty
# cache repo (but unsynchronized for now)
parts = project.split('/')
path = os.path.join(self.upstream_cache_root, *parts[:-1])
if not os.path.exists(path):
os.makedirs(path)
path = os.path.join(self.upstream_cache_root, project)
repo = git.Repo.init(path)
with repo.config_writer() as config_writer:
config_writer.set_value('user', 'email', 'user@example.com')
config_writer.set_value('user', 'name', 'User Name')
def synchronize_repo(self, project):
# Synchronize the upstream repo to the cache
upstream_path = os.path.join(self.upstream_root, project)
upstream = git.Repo(upstream_path)
cache_path = os.path.join(self.upstream_cache_root, project)
cache = git.Repo(cache_path)
refs = upstream.git.for_each_ref(
'--format=%(objectname) %(refname)'
)
for ref in refs.splitlines():
parts = ref.split(" ")
if len(parts) == 2:
commit, ref = parts
else:
continue
self.log.debug("Synchronize ref %s: %s", ref, commit)
cache.git.fetch(upstream_path, ref)
binsha = gitdb.util.to_bin_sha(commit)
obj = git.objects.Object.new_from_sha(cache, binsha)
git.refs.Reference.create(cache, ref, obj, force=True)
@simple_layout('layouts/merging-github.yaml', driver='github')
def test_github_repo_cache(self):
# Test that we fetch and configure retries correctly when
# using a github enterprise repo cache (the cache can be
# slightly out of sync).
github = self.fake_github.getGithubClient()
repo = github.repo_from_project('org/project')
repo._set_branch_protection('master', require_review=True)
# Make sure we have correctly overridden the retry attempts
merger = self.executor_server.merger
repo = merger.getRepo('github', 'org/project')
self.assertEqual(repo.retry_attempts, 1)
# Our initial attempt should fail; make it happen quickly
self.patch(Repo, 'retry_interval', 1)
# pipeline merges the pull request on success
A = self.fake_github.openFakePullRequest('org/project', 'master',
'PR title',
body='I shouldnt be seen',
body_text='PR body')
A.addReview('user', 'APPROVED')
self.fake_github.emitEvent(A.getCommentAddedEvent('merge me'))
self.waitUntilSettled('initial failed attempt')
self.assertFalse(A.is_merged)
# Now synchronize the upstream repo to the cache and try again
self.synchronize_repo('org/project')
self.fake_github.emitEvent(A.getCommentAddedEvent('merge me'))
self.waitUntilSettled('second successful attempt')
self.assertTrue(A.is_merged)
self.assertThat(A.merge_message,
MatchesRegex(r'.*PR title\n\nPR body.*', re.DOTALL))
self.assertThat(A.merge_message,
Not(MatchesRegex(
r'.*I shouldnt be seen.*',
re.DOTALL)))
self.assertEqual(len(A.comments), 0)

View File

@ -2872,6 +2872,7 @@ class TestWebMulti(BaseTestWeb):
'driver': 'github',
'name': 'github',
'server': 'github.com',
'repo_cache': None,
}
self.assertEqual([gerrit_connection, github_connection], data)

View File

@ -1267,6 +1267,21 @@ class GithubConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
self.server = self.connection_config.get('server', 'github.com')
self.canonical_hostname = self.connection_config.get(
'canonical_hostname', self.server)
self.repo_cache = self.connection_config.get('repo_cache')
if self.git_ssh_key and self.repo_cache:
self.log.warning("Both sshkey and repo_cache specified "
"but are incompatible; "
"repo_cache will be ignored")
self.repo_cache = None
if self.repo_cache:
rrt = self.connection_config.get('repo_retry_timeout')
if rrt:
self.repo_retry_timeout = int(rrt)
else:
self.repo_retry_timeout = None
else:
self.repo_retry_timeout = None
self.source = driver.getSource(self)
self._sha_pr_cache = GithubShaCache()
@ -1288,6 +1303,7 @@ class GithubConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
"baseurl": self._github_client_manager.base_url,
"canonical_hostname": self.canonical_hostname,
"server": self.server,
"repo_cache": self.repo_cache,
})
return d
@ -1667,6 +1683,11 @@ class GithubConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
if not self._github_client_manager.initialized:
self._github_client_manager.initialize()
if self.repo_cache:
server = self.repo_cache
else:
server = self.server
if self._github_client_manager.usesAppAuthentication:
# We may be in the context of a merger or executor here. The
# mergers and executors don't receive webhook events so they miss
@ -1676,10 +1697,10 @@ class GithubConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
self._github_client_manager.get_installation_key(
project.name, reprime=True)
return 'https://x-access-token:%s@%s/%s' % (installation_key,
self.server,
server,
project.name)
return 'https://%s/%s' % (self.server, project.name)
return 'https://%s/%s' % (server, project.name)
def getGitwebUrl(self, project, sha=None, tag=None):
url = 'https://%s/%s' % (self.server, project)

View File

@ -150,6 +150,10 @@ class GithubSource(BaseSource):
"""Get the git url for a project."""
return self.connection.getGitUrl(project)
def getRetryTimeout(self, project):
"""Get the retry timeout for a project."""
return self.connection.repo_retry_timeout
def getGitwebUrl(self, project, sha=None):
"""Get the git-web url for a project."""
return self.connection.getGitwebUrl(project, sha)

View File

@ -19,6 +19,7 @@ from typing import Optional
from urllib.parse import urlsplit, urlunsplit, urlparse
import hashlib
import logging
import math
import os
import re
import shutil
@ -74,7 +75,7 @@ class Repo(object):
def __init__(self, remote, local, email, username, speed_limit, speed_time,
sshkey=None, cache_path=None, logger=None, git_timeout=300,
zuul_event_id=None):
zuul_event_id=None, retry_timeout=None):
if logger is None:
self.log = logging.getLogger("zuul.Repo")
else:
@ -85,6 +86,9 @@ class Repo(object):
'GIT_HTTP_LOW_SPEED_TIME': speed_time,
}
self.git_timeout = git_timeout
if retry_timeout:
self.retry_attempts = math.ceil(
retry_timeout / self.retry_interval)
self.sshkey = sshkey
if sshkey:
self.env['GIT_SSH_COMMAND'] = 'ssh -i %s' % (sshkey,)
@ -859,7 +863,7 @@ class Merger(object):
f.write(self.scheme)
def _addProject(self, hostname, connection_name, project_name, url, sshkey,
zuul_event_id, process_worker=None):
zuul_event_id, process_worker=None, retry_timeout=None):
repo = None
key = '/'.join([hostname, project_name])
try:
@ -878,7 +882,7 @@ class Merger(object):
url, path, self.email, self.username, self.speed_limit,
self.speed_time, sshkey=sshkey, cache_path=cache_path,
logger=self.logger, git_timeout=self.git_timeout,
zuul_event_id=zuul_event_id)
zuul_event_id=zuul_event_id, retry_timeout=retry_timeout)
self.repos[key] = repo
except Exception:
@ -893,6 +897,7 @@ class Merger(object):
project = source.getProject(project_name)
hostname = project.canonical_hostname
url = source.getGitUrl(project)
retry_timeout = source.getRetryTimeout(project)
key = '/'.join([hostname, project_name])
if key in self.repos:
repo = self.repos[key]
@ -906,7 +911,8 @@ class Merger(object):
(connection_name, project_name,))
return self._addProject(hostname, connection_name, project_name, url,
sshkey, zuul_event_id,
process_worker=process_worker)
process_worker=process_worker,
retry_timeout=retry_timeout)
def updateRepo(self, connection_name, project_name, repo_state=None,
zuul_event_id=None, build=None, process_worker=None):

View File

@ -142,13 +142,21 @@ class BaseSource(object, metaclass=abc.ABCMeta):
@abc.abstractmethod
def getProjectOpenChanges(self, project):
"""Get the open changes for a project."""
@abc.abstractmethod
def getGitUrl(self, project):
"""Get the git url for a project."""
def getRetryTimeout(self, project):
"""Get the retry timeout for a project in seconds.
This is used by the mergers to potentially increase the number
of git fetch retries before giving up. Return None to use the
default.
"""
return None
@abc.abstractmethod
def getProject(self, name):
"""Get a project."""