d9ac4c18b1
Zuul mergers create a vast number of git references under /refs/zuul which are never garbage collected. With hundred of thousands of references, that makes git fetch operations very slow since git uploads all references to Gerrit to synchronize the Zuul maintained repository. On one of Wikimedia busy repository (mediawiki/core) we had 55000 such references and it can take up to 18 seconds for a fetch to complete. I have seen occurences of a merge taking 2 minutes to complete. As such, this tiny script clears out references for which the commit date of the pointed commit object is older than 360 days (the default). It is not perfect since a recent reference can well point to an old object. That would be the case on repositories that are barely active. In such case the ref will be gone despite it being recently created. A better way would be to vary Zuul references by using month/day which will let one easily garbage collect them. But I am being lazy and that would not let us clear out references using the current scheme. Example usage: zuul-clear-refs.py --verbose --dry-run --until 90 /srv/zuul/git/project Would show a list of references pointing to commit dates older than 90 days and output a message whenever the script would delete them. Hint about the utility in our merger documentation. Reference: https://phabricator.wikimedia.org/T70481 Change-Id: Id4e55f5d571ebd5e8271e516f53f8e05c1f78c1a
95 lines
3.0 KiB
Python
Executable File
95 lines
3.0 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# Copyright 2014-2015 Antoine "hashar" Musso
|
|
# Copyright 2014-2015 Wikimedia Foundation Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
# pylint: disable=locally-disabled, invalid-name
|
|
|
|
"""
|
|
Zuul references cleaner.
|
|
|
|
Clear up references under /refs/zuul/ by inspecting the age of the commit the
|
|
reference points to. If the commit date is older than a number of days
|
|
specificed by --until, the reference is deleted from the git repository.
|
|
|
|
Use --dry-run --verbose to finely inspect the script behavior.
|
|
"""
|
|
|
|
import argparse
|
|
import git
|
|
import logging
|
|
import time
|
|
import sys
|
|
|
|
NOW = int(time.time())
|
|
DEFAULT_DAYS = 360
|
|
ZUUL_REF_PREFIX = 'refs/zuul/'
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description=__doc__,
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
)
|
|
parser.add_argument('--until', dest='days_ago', default=DEFAULT_DAYS, type=int,
|
|
help='references older than this number of day will '
|
|
'be deleted. Default: %s' % DEFAULT_DAYS)
|
|
parser.add_argument('-n', '--dry-run', dest='dryrun', action='store_true',
|
|
help='do not delete references')
|
|
parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
|
|
help='set log level from info to debug')
|
|
parser.add_argument('gitrepo', help='path to a Zuul git repository')
|
|
args = parser.parse_args()
|
|
|
|
logging.basicConfig()
|
|
log = logging.getLogger('zuul-clear-refs')
|
|
if args.verbose:
|
|
log.setLevel(logging.DEBUG)
|
|
else:
|
|
log.setLevel(logging.INFO)
|
|
|
|
try:
|
|
repo = git.Repo(args.gitrepo)
|
|
except git.exc.InvalidGitRepositoryError:
|
|
log.error("Invalid git repo: %s" % args.gitrepo)
|
|
sys.exit(1)
|
|
|
|
for ref in repo.references:
|
|
|
|
if not ref.path.startswith(ZUUL_REF_PREFIX):
|
|
continue
|
|
if type(ref) is not git.refs.reference.Reference:
|
|
# Paranoia: ignore heads/tags/remotes ..
|
|
continue
|
|
|
|
try:
|
|
commit_ts = ref.commit.committed_date
|
|
except LookupError:
|
|
# GitPython does not properly handle PGP signed tags
|
|
log.exception("Error in commit: %s, ref: %s. Type: %s",
|
|
ref.commit, ref.path, type(ref))
|
|
continue
|
|
|
|
commit_age = int((NOW - commit_ts) / 86400) # days
|
|
log.debug(
|
|
"%s at %s is %3s days old",
|
|
ref.commit,
|
|
ref.path,
|
|
commit_age,
|
|
)
|
|
if commit_age > args.days_ago:
|
|
if args.dryrun:
|
|
log.info("Would delete old ref: %s (%s)", ref.path, ref.commit)
|
|
else:
|
|
log.info("Deleting old ref: %s (%s)", ref.path, ref.commit)
|
|
ref.delete(repo, ref.path)
|