Prevent Git GC issue between merger and executor

Since executors can also handle merge jobs and those merges happen in
the executor's repo cache we need to protect temporary merger refs from
being garbage collected.

Because the executor's update jobs might reset the local branch heads in
between merges, we create the refs for the speculative branch state in
'refs/zuul' instead. Those refs are cleaned up when the related branch
no longer exists.

Branch names for the Zuul refs are hashed (SHA1) in order to avoid
issues with empty directories when the branch name contains slashes.
E.g. the speculative state of the master branch will be referenced by
'refs/zuul/4f26aeafdb2367620a393c973eddbe8f8b846ebd'

Change-Id: Idd2b0bd2dfeba22f3961f851f8a463bc5c9d37ff
This commit is contained in:
Simon Westphahl 2020-08-20 15:00:30 +02:00
parent 2d46288a25
commit afb896da5d
2 changed files with 63 additions and 10 deletions

View File

@ -647,7 +647,7 @@ class TestMerger(ZuulTestCase):
def test_merge_temp_refs(self):
"""
Test that the merge updates local branches in order to avoid
Test that the merge updates local zuul refs in order to avoid
garbage collection of needed objects.
"""
merger = self.executor_server.merger
@ -671,13 +671,26 @@ class TestMerger(ZuulTestCase):
# Merge A -> B -> C
result = merger.mergeChanges([item_a, item_b, item_c])
self.assertIsNotNone(result)
merge_state = result[3]
cache_repo = merger.getRepo('gerrit', 'org/project')
repo = cache_repo.createRepoObject(zuul_event_id="dummy")
# Make sure local refs are updated
self.assertIn("foo/bar", repo.refs)
self.assertEqual(repo.refs.master.commit, repo.head.commit)
# Make sure zuul refs are updated
foobar_zuul_ref = Repo.refNameToZuulRef("foo/bar")
master_zuul_ref = Repo.refNameToZuulRef("master")
ref_map = {r.path: r for r in repo.refs}
self.assertIn(foobar_zuul_ref, ref_map)
self.assertIn(master_zuul_ref, ref_map)
self.assertEqual(
ref_map[master_zuul_ref].commit.hexsha,
merge_state[("gerrit", "org/project", "master")]
)
self.assertEqual(
ref_map[foobar_zuul_ref].commit.hexsha,
merge_state[("gerrit", "org/project", "foo/bar")]
)
# Delete the remote branch so a reset cleanes up the local branch
parent_repo.delete_head('foo/bar', force=True)
@ -690,7 +703,7 @@ class TestMerger(ZuulTestCase):
Repo._cleanup_leaked_ref_dirs(parent_path, None, [])
cache_repo.reset()
self.assertNotIn("foo/bar", repo.refs)
self.assertNotIn(foobar_zuul_ref, [r.path for r in repo.refs])
# Create another head 'foo' that can't be created if the 'foo/bar'
# branch wasn't cleaned up properly
@ -703,5 +716,18 @@ class TestMerger(ZuulTestCase):
# Merge A -> B -> C
result = merger.mergeChanges([item_a, item_b, item_c])
self.assertIsNotNone(result)
self.assertIn("foo", repo.refs)
self.assertEqual(repo.refs.master.commit, repo.head.commit)
merge_state = result[3]
foo_zuul_ref = Repo.refNameToZuulRef("foo")
ref_map = {r.path: r for r in repo.refs}
self.assertIn(foo_zuul_ref, ref_map)
self.assertIn(master_zuul_ref, ref_map)
self.assertEqual(
ref_map[master_zuul_ref].commit.hexsha,
merge_state[("gerrit", "org/project", "master")]
)
self.assertEqual(
ref_map[foo_zuul_ref].commit.hexsha,
merge_state[("gerrit", "org/project", "foo")]
)

View File

@ -15,6 +15,7 @@
from contextlib import contextmanager
from urllib.parse import urlsplit, urlunsplit, urlparse
import hashlib
import logging
import os
import re
@ -320,6 +321,12 @@ class Repo(object):
messages.append("Cleaning empty ref dir %s" % root)
os.rmdir(root)
@staticmethod
def refNameToZuulRef(ref_name: str) -> str:
return "refs/zuul/{}".format(
hashlib.sha1(ref_name.encode("utf-8")).hexdigest()
)
@staticmethod
def _reset(local_path, env, log=None):
messages = []
@ -342,6 +349,9 @@ class Repo(object):
raise Exception("Couldn't detach HEAD to any existing commit")
# Delete local heads that no longer exist on the remote end
zuul_refs_to_keep = [
"refs/zuul/fetch", # ref to last FETCH_HEAD
]
remote_heads = {r.remote_head for r in origin.refs}
for ref in repo.heads:
if ref.name not in remote_heads:
@ -350,6 +360,18 @@ class Repo(object):
else:
messages.append("Delete stale local ref %s" % ref)
repo.delete_head(ref, force=True)
else:
zuul_refs_to_keep.append(Repo.refNameToZuulRef(ref.name))
# Delete local zuul refs when the related branch no longer exists
for ref in (r for r in repo.refs if r.path.startswith("refs/zuul/")):
if ref.path in zuul_refs_to_keep:
continue
if log:
log.debug("Delete stale Zuul ref %s", ref)
else:
messages.append("Delete stale Zuul ref {}".format(ref))
Repo._deleteRef(ref.path, repo)
# Note: Before git 2.13 deleting a a ref foo/bar leaves an empty
# directory foo behind that will block creating the reference foo
@ -911,10 +933,15 @@ class Merger(object):
# Store this commit as the most recent for this project-branch
recent[key] = commit
# Ensure the local head always references the most recent
# Make sure to have a local ref that points to the most recent
# (intermediate) speculative state of a branch, so commits are not
# garbage collected.
repo.setBranchHead(item["branch"], commit, zuul_event_id=zuul_event_id)
# garbage collected. The branch name is hashed to not cause any
# problems with empty directories in case of branch names containing
# slashes. In order to prevent issues with Git garbage collection
# between merger and executor jobs, we create refs in "refs/zuul"
# instead of updating local branch heads.
repo.setRef(Repo.refNameToZuulRef(item["branch"]),
commit.hexsha, zuul_event_id=zuul_event_id)
return orig_commit, commit