Merge "Revert "Revert "Tune automatic garbage collection of git repos"""

This commit is contained in:
Zuul 2020-07-27 12:28:49 +00:00 committed by Gerrit Code Review
commit b1590dfb0e
2 changed files with 75 additions and 1 deletions

View File

@ -437,6 +437,41 @@ class TestMergerRepo(ZuulTestCase):
self.assertEqual(['master', 'stable', 'stable2', 'stable3'],
repo.getBranches())
def test_garbage_collect(self):
'''Tests that git gc doesn't prune FETCH_HEAD'''
parent_path = os.path.join(self.upstream_root, 'org/project1')
repo = git.Repo(parent_path)
change_ref = 'refs/changes/1/1'
self.log.info('Creating a commit on %s', change_ref)
repo.head.reference = repo.head.commit
files = {"README": "creating fake commit\n"}
for name, content in files.items():
file_name = os.path.join(parent_path, name)
with open(file_name, 'a') as f:
f.write(content)
repo.index.add([file_name])
commit = repo.index.commit('Test commit')
ref = git.refs.Reference(repo, change_ref)
ref.set_commit(commit)
self.log.info('Cloning parent repo')
work_repo = Repo(parent_path, self.workspace_root,
'none@example.org', 'User Name', '0', '0')
self.log.info('Fetch %s', change_ref)
work_repo.fetch(change_ref)
self.log.info('Checkout master and run garbage collection')
work_repo_object = work_repo.createRepoObject(None)
work_repo.checkout('master')
result = work_repo_object.git.gc('--prune=now')
self.log.info(result)
self.log.info('Dereferencing FETCH_HEAD')
commit = work_repo_object.commit('FETCH_HEAD')
self.assertIsNotNone(commit)
class TestMergerWithAuthUrl(ZuulTestCase):
config_file = 'zuul-github-driver.conf'

View File

@ -189,6 +189,42 @@ class Repo(object):
config_writer.set_value('user', 'email', self.email)
if self.username:
config_writer.set_value('user', 'name', self.username)
# By default automatic garbage collection in git runs
# asynchronously in the background. This can lead to broken repos
# caused by a race in the following scenario:
# 1. git fetch (eventually triggers async gc)
# 2. zuul deletes all refs as part of reset
# 3. git gc looks for unreachable objects
# 4. zuul re-creates all refs as part of reset
# 5. git gc deletes unreachable objects it found
# Result is a repo with refs pointing to not existing objects.
# To prevent this race autoDetach can be disabled so git fetch
# returns after the gc finished.
config_writer.set_value('gc', 'autoDetach', 'false')
# Lower the threshold of how many loose objects can trigger
# automatic garbage collection. With the default value of 6700
# we observed that with some repos automatic garbage collection
# simply refused to do its job because it refuses to prune if the
# number of unreachable objects it needs to prune exceeds a certain
# threshold. Thus lower the threshold to trigger automatic garbage
# collection more often.
config_writer.set_value('gc', 'auto', '512')
# By default garbage collection keeps unreachable objects for two
# weeks. However we don't need to carry around any unreachable
# objects so just prune them all when gc kicks in.
config_writer.set_value('gc', 'pruneExpire', 'now')
# By default git keeps a reflog of each branch for 90 days. Objects
# that are reachable from a reflog entry are not considered
# unrechable and thus won't be pruned for 90 days. This can blow up
# the repo significantly over time. Since the reflog is only really
# useful for humans working with repos we can just drop all the
# reflog when gc kicks in.
config_writer.set_value('gc', 'reflogExpire', 'now')
config_writer.write()
if rewrite_url:
self._git_set_remote_url(repo, self.remote_url)
@ -220,7 +256,10 @@ class Repo(object):
for attempt in range(1, self.retry_attempts + 1):
try:
with timeout_handler(self.local_path):
repo.git.fetch(remote, ref,
ref_to_fetch = ref
if ref_to_fetch:
ref_to_fetch += ':refs/zuul/fetch'
repo.git.fetch(remote, ref_to_fetch,
kill_after_timeout=self.git_timeout, f=True,
**kwargs)
break