From 0f7982fee003a4359acb699cf37ca12548cd5995 Mon Sep 17 00:00:00 2001 From: Clark Boylan Date: Fri, 11 Sep 2020 10:34:47 -0700 Subject: [PATCH] Clean up stale git index.lock files on merger startup We've noticed that if zuul executors (and presumably mergers) don't shut down gracefully that they may leak git index.lock files in the .git dirs of the merger repos. Since these repos should be dedicated to zuul's use without outside interference we can reasonably safely remove any present index.lock files when starting zuul mergers (and executors). This implementation does an os.walk under the merger repos root looking for .git dirs and once it has found them checks for any index.lock files. This happens before starting the gearman worker which should avoid any races with these resources. Change-Id: Ie043453bcdf4500a3718da6f705c882431acafdf --- tests/unit/test_merger_repo.py | 37 ++++++++++++++++++++++++++++++++++ zuul/merger/server.py | 19 +++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/tests/unit/test_merger_repo.py b/tests/unit/test_merger_repo.py index 28e4b44567..9a626ea56b 100644 --- a/tests/unit/test_merger_repo.py +++ b/tests/unit/test_merger_repo.py @@ -731,3 +731,40 @@ class TestMerger(ZuulTestCase): ref_map[foo_zuul_ref].commit.hexsha, merge_state[("gerrit", "org/project", "foo")] ) + + def test_stale_index_lock_cleanup(self): + # Stop the running executor's merger. We needed it running to merge + # things during test boostrapping but now it is just in the way. + self.executor_server.merger_gearworker.stop() + self.executor_server.merger_gearworker.join() + # Start the merger and do a merge to populate the repo on disk + self._startMerger() + + A = self.fake_gerrit.addFakeChange('org/project1', 'master', 'A') + A.addApproval('Code-Review', 2) + self.fake_gerrit.addEvent(A.addApproval('Approved', 1)) + self.waitUntilSettled() + self.assertEqual(A.data['status'], 'MERGED') + + # Stop the merger so we can modify the git repo + self.merge_server.stop() + self.merge_server.join() + + # Add an index.lock file + fpath = os.path.join(self.merger_src_root, 'review.example.com', + 'org', 'project1', '.git', 'index.lock') + with open(fpath, 'w'): + pass + self.assertTrue(os.path.exists(fpath)) + + # Start a new merger and check that we can still merge things + self._startMerger() + + # This will fail if git can't modify the repo due to a stale lock file. + B = self.fake_gerrit.addFakeChange('org/project1', 'master', 'B') + B.addApproval('Code-Review', 2) + self.fake_gerrit.addEvent(B.addApproval('Approved', 1)) + self.waitUntilSettled() + self.assertEqual(B.data['status'], 'MERGED') + + self.assertFalse(os.path.exists(fpath)) diff --git a/zuul/merger/server.py b/zuul/merger/server.py index 08951f75c7..7b2090c5ee 100644 --- a/zuul/merger/server.py +++ b/zuul/merger/server.py @@ -14,6 +14,7 @@ import json import logging +import os import threading from abc import ABCMeta @@ -103,6 +104,24 @@ class BaseMergeServer(metaclass=ABCMeta): def start(self): self.log.debug('Starting merger worker') + self.log.debug('Cleaning any stale git index.lock files') + for (dirpath, dirnames, filenames) in os.walk(self.merge_root): + if '.git' in dirnames: + # Only recurse into .git dirs + dirnames.clear() + dirnames.append('.git') + elif dirpath.endswith('/.git'): + # Recurse no further + dirnames.clear() + if 'index.lock' in filenames: + fp = os.path.join(dirpath, 'index.lock') + try: + os.unlink(fp) + self.log.debug('Removed stale git lock: %s' % fp) + except Exception: + self.log.exception( + 'Unable to remove stale git lock: ' + '%s this may result in failed merges' % fp) self.merger_gearworker.start() def stop(self):