From d9ac4c18b1da7d74e85de3d2c989c67d5629a434 Mon Sep 17 00:00:00 2001 From: Antoine Musso Date: Thu, 24 Jul 2014 14:44:02 +0200 Subject: [PATCH] Zuul references cleaner Zuul mergers create a vast number of git references under /refs/zuul which are never garbage collected. With hundred of thousands of references, that makes git fetch operations very slow since git uploads all references to Gerrit to synchronize the Zuul maintained repository. On one of Wikimedia busy repository (mediawiki/core) we had 55000 such references and it can take up to 18 seconds for a fetch to complete. I have seen occurences of a merge taking 2 minutes to complete. As such, this tiny script clears out references for which the commit date of the pointed commit object is older than 360 days (the default). It is not perfect since a recent reference can well point to an old object. That would be the case on repositories that are barely active. In such case the ref will be gone despite it being recently created. A better way would be to vary Zuul references by using month/day which will let one easily garbage collect them. But I am being lazy and that would not let us clear out references using the current scheme. Example usage: zuul-clear-refs.py --verbose --dry-run --until 90 /srv/zuul/git/project Would show a list of references pointing to commit dates older than 90 days and output a message whenever the script would delete them. Hint about the utility in our merger documentation. Reference: https://phabricator.wikimedia.org/T70481 Change-Id: Id4e55f5d571ebd5e8271e516f53f8e05c1f78c1a --- doc/source/merger.rst | 14 ++++++ tools/zuul-clear-refs.py | 94 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100755 tools/zuul-clear-refs.py diff --git a/doc/source/merger.rst b/doc/source/merger.rst index e01bc8c7df..82e204b2cc 100644 --- a/doc/source/merger.rst +++ b/doc/source/merger.rst @@ -58,3 +58,17 @@ instance, a clone will produce a repository in an unpredictable state depending on what the state of Zuul's repository is when the clone happens). They are, however, suitable for automated systems that respond to Zuul triggers. + +Clearing old references +~~~~~~~~~~~~~~~~~~~~~~~ + +The references created under refs/zuul are not garbage collected. Since +git fetch send them all to Gerrit to sync the repositories, the time +spent on merge will slightly grow overtime and start being noticeable. + +To clean them you can use the ``tools/zuul-clear-refs.py`` script on +each repositories. It will delete Zuul references that point to commits +for which the commit date is older than a given amount of days (default +360):: + + ./tools/zuul-clear-refs.py /path/to/zuul/git/repo diff --git a/tools/zuul-clear-refs.py b/tools/zuul-clear-refs.py new file mode 100755 index 0000000000..60ce74422f --- /dev/null +++ b/tools/zuul-clear-refs.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python +# Copyright 2014-2015 Antoine "hashar" Musso +# Copyright 2014-2015 Wikimedia Foundation Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# pylint: disable=locally-disabled, invalid-name + +""" +Zuul references cleaner. + +Clear up references under /refs/zuul/ by inspecting the age of the commit the +reference points to. If the commit date is older than a number of days +specificed by --until, the reference is deleted from the git repository. + +Use --dry-run --verbose to finely inspect the script behavior. +""" + +import argparse +import git +import logging +import time +import sys + +NOW = int(time.time()) +DEFAULT_DAYS = 360 +ZUUL_REF_PREFIX = 'refs/zuul/' + +parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, +) +parser.add_argument('--until', dest='days_ago', default=DEFAULT_DAYS, type=int, + help='references older than this number of day will ' + 'be deleted. Default: %s' % DEFAULT_DAYS) +parser.add_argument('-n', '--dry-run', dest='dryrun', action='store_true', + help='do not delete references') +parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', + help='set log level from info to debug') +parser.add_argument('gitrepo', help='path to a Zuul git repository') +args = parser.parse_args() + +logging.basicConfig() +log = logging.getLogger('zuul-clear-refs') +if args.verbose: + log.setLevel(logging.DEBUG) +else: + log.setLevel(logging.INFO) + +try: + repo = git.Repo(args.gitrepo) +except git.exc.InvalidGitRepositoryError: + log.error("Invalid git repo: %s" % args.gitrepo) + sys.exit(1) + +for ref in repo.references: + + if not ref.path.startswith(ZUUL_REF_PREFIX): + continue + if type(ref) is not git.refs.reference.Reference: + # Paranoia: ignore heads/tags/remotes .. + continue + + try: + commit_ts = ref.commit.committed_date + except LookupError: + # GitPython does not properly handle PGP signed tags + log.exception("Error in commit: %s, ref: %s. Type: %s", + ref.commit, ref.path, type(ref)) + continue + + commit_age = int((NOW - commit_ts) / 86400) # days + log.debug( + "%s at %s is %3s days old", + ref.commit, + ref.path, + commit_age, + ) + if commit_age > args.days_ago: + if args.dryrun: + log.info("Would delete old ref: %s (%s)", ref.path, ref.commit) + else: + log.info("Deleting old ref: %s (%s)", ref.path, ref.commit) + ref.delete(repo, ref.path)