From b17dfc13ed72b9d58b53952cc88d15343be2329d Mon Sep 17 00:00:00 2001 From: Simon Westphahl Date: Mon, 14 Nov 2022 15:18:12 +0100 Subject: [PATCH] Cleanup leaked git index.lock files on checkout When the git command crashes or is aborted due to a timeout we might end up with a leaked index.lock file in the affected repository. This has the effect that all subsequent git operations that try to create the lock will fail. Since Zuul maintains a separate lock for serializing operations on a repositotry, we can be sure that the lock file was leaked in a previous operation and can be removed safely. Unable to checkout 8a87ff7cc0d0c73ac14217b653f9773a7cfce3a7 Traceback (most recent call last): File "/opt/zuul/lib/python3.10/site-packages/zuul/merger/merger.py", line 1045, in _mergeChange repo.checkout(ref, zuul_event_id=zuul_event_id) File "/opt/zuul/lib/python3.10/site-packages/zuul/merger/merger.py", line 561, in checkout repo.head.reset(working_tree=True) File "/opt/zuul/lib/python3.10/site-packages/git/refs/head.py", line 82, in reset self.repo.git.reset(mode, commit, '--', paths, **kwargs) File "/opt/zuul/lib/python3.10/site-packages/git/cmd.py", line 542, in return lambda *args, **kwargs: self._call_process(name, *args, **kwargs) File "/opt/zuul/lib/python3.10/site-packages/git/cmd.py", line 1005, in _call_process return self.execute(call, **exec_kwargs) File "/opt/zuul/lib/python3.10/site-packages/git/cmd.py", line 822, in execute raise GitCommandError(command, status, stderr_value, stdout_value) git.exc.GitCommandError: Cmd('git') failed due to: exit code(128) cmdline: git reset --hard HEAD -- stderr: 'fatal: Unable to create '/var/lib/zuul/merger-git/github/foo/foo%2Fbar/.git/index.lock': File exists. Another git process seems to be running in this repository, e.g. an editor opened by 'git commit'. Please make sure all processes are terminated then try again. If it still fails, a git process may have crashed in this repository earlier: remove the file manually to continue.' Change-Id: I97334383df476809c39e0d03b1af50cb59ee0cc7 --- tests/unit/test_merger_repo.py | 7 ------- zuul/merger/merger.py | 29 +++++++++++++++++++++-------- zuul/merger/server.py | 19 ------------------- 3 files changed, 21 insertions(+), 34 deletions(-) diff --git a/tests/unit/test_merger_repo.py b/tests/unit/test_merger_repo.py index 182dea1201..fd78726ab3 100644 --- a/tests/unit/test_merger_repo.py +++ b/tests/unit/test_merger_repo.py @@ -960,10 +960,6 @@ class TestMerger(ZuulTestCase): self.waitUntilSettled() self.assertEqual(A.data['status'], 'MERGED') - # Stop the merger so we can modify the git repo - self.merge_server.stop() - self.merge_server.join() - # Add an index.lock file fpath = os.path.join(self.merger_src_root, 'review.example.com', 'org', 'org%2Fproject1', '.git', 'index.lock') @@ -971,9 +967,6 @@ class TestMerger(ZuulTestCase): pass self.assertTrue(os.path.exists(fpath)) - # Start a new merger and check that we can still merge things - self._startMerger() - # This will fail if git can't modify the repo due to a stale lock file. B = self.fake_gerrit.addFakeChange('org/project1', 'master', 'B') B.addApproval('Code-Review', 2) diff --git a/zuul/merger/merger.py b/zuul/merger/merger.py index 09968eac6c..d400c253bb 100644 --- a/zuul/merger/merger.py +++ b/zuul/merger/merger.py @@ -546,16 +546,29 @@ class Repo(object): log.debug("Repo is already at %s" % ref) else: log.debug("Checking out %s" % ref) - # Perform a hard reset to the correct ref before checking out so - # that we clean up anything that might be left over from a merge - # while still only preparing the working copy once. - repo.head.reference = ref - repo.head.reset(working_tree=True) - repo.git.clean('-x', '-f', '-d') - repo.git.checkout(ref) - + try: + self._checkout(repo, ref) + except Exception: + lock_path = f"{self.local_path}/.git/index.lock" + if os.path.isfile(lock_path): + log.warning("Deleting stale index.lock file: %s", + lock_path) + os.unlink(lock_path) + # Retry the checkout + self._checkout(repo, ref) + else: + raise return repo.head.commit + def _checkout(self, repo, ref): + # Perform a hard reset to the correct ref before checking out so + # that we clean up anything that might be left over from a merge + # while still only preparing the working copy once. + repo.head.reference = ref + repo.head.reset(working_tree=True) + repo.git.clean('-x', '-f', '-d') + repo.git.checkout(ref) + def cherryPick(self, ref, zuul_event_id=None): log = get_annotated_logger(self.log, zuul_event_id) repo = self.createRepoObject(zuul_event_id) diff --git a/zuul/merger/server.py b/zuul/merger/server.py index a55cec3baa..b843c8f3e4 100644 --- a/zuul/merger/server.py +++ b/zuul/merger/server.py @@ -15,7 +15,6 @@ import json import logging -import os import socket import sys import threading @@ -156,24 +155,6 @@ class BaseMergeServer(metaclass=ABCMeta): def start(self): self.log.debug('Starting merger') - self.log.debug('Cleaning any stale git index.lock files') - for (dirpath, dirnames, filenames) in os.walk(self.merge_root): - if '.git' in dirnames: - # Only recurse into .git dirs - dirnames.clear() - dirnames.append('.git') - elif dirpath.endswith('/.git'): - # Recurse no further - dirnames.clear() - if 'index.lock' in filenames: - fp = os.path.join(dirpath, 'index.lock') - try: - os.unlink(fp) - self.log.debug('Removed stale git lock: %s' % fp) - except Exception: - self.log.exception( - 'Unable to remove stale git lock: ' - '%s this may result in failed merges' % fp) self._merger_running = True self.merger_thread.start()