GC git.Repo objects in merger.Repo operations.

* zuul/merger.py: Zuul's merger.Repo is holding on to repo objects that
keep many file descriptors open. Turn all git.Repo objects into method
local references so that the git.Repo objects can be garbage collected
once these methods are popped off the stack. GCing these objects frees
the file descriptors associated with that repo. This adds overhead to
repo operations as each operation must do initial setup IO.

* tests/test_scheduler.py: Assert that no instances of git.Repo are
known to the python garbage collector. Do the assertion after running a
fully garbage collection run. This is a cheap check that repo objects
are being garbage collected properly at the end of each test.

Change-Id: I1d67981f32708a85af62ca622402de9fac0e1842
This commit is contained in:
Clark Boylan 2013-11-11 18:03:53 -08:00
parent 6dbbc488d2
commit 4ba48d9967
2 changed files with 47 additions and 48 deletions

View File

@ -16,6 +16,7 @@
import ConfigParser
from cStringIO import StringIO
import gc
import hashlib
import json
import logging
@ -834,6 +835,13 @@ class TestScheduler(testtools.TestCase):
def assertFinalState(self):
# Make sure that the change cache is cleared
self.assertEqual(len(self.gerrit._change_cache.keys()), 0)
# Make sure that git.Repo objects have been garbage collected.
repos = []
gc.collect()
for obj in gc.get_objects():
if isinstance(obj, git.Repo):
repos.append(obj)
self.assertEqual(len(repos), 0)
self.assertEmptyQueues()
def shutdown(self):

View File

@ -46,103 +46,96 @@ class Repo(object):
self.log.debug("Cloning from %s to %s" % (self.remote_url,
self.local_path))
git.Repo.clone_from(self.remote_url, self.local_path)
self.repo = git.Repo(self.local_path)
repo = git.Repo(self.local_path)
if self.email:
self.repo.config_writer().set_value('user', 'email',
self.email)
repo.config_writer().set_value('user', 'email',
self.email)
if self.username:
self.repo.config_writer().set_value('user', 'name',
self.username)
self.repo.config_writer().write()
repo.config_writer().set_value('user', 'name',
self.username)
repo.config_writer().write()
self._initialized = True
def recreateRepoObject(self):
self._ensure_cloned()
self.repo = git.Repo(self.local_path)
def createRepoObject(self):
try:
self._ensure_cloned()
repo = git.Repo(self.local_path)
except:
self.log.exception("Unable to initialize repo for %s" %
self.local_path)
return repo
def reset(self):
self._ensure_cloned()
repo = self.createRepoObject()
self.log.debug("Resetting repository %s" % self.local_path)
self.update()
origin = self.repo.remotes.origin
origin = repo.remotes.origin
for ref in origin.refs:
if ref.remote_head == 'HEAD':
continue
self.repo.create_head(ref.remote_head, ref, force=True)
repo.create_head(ref.remote_head, ref, force=True)
# Reset to remote HEAD (usually origin/master)
self.repo.head.reference = origin.refs['HEAD']
self.repo.head.reset(index=True, working_tree=True)
self.repo.git.clean('-x', '-f', '-d')
repo.head.reference = origin.refs['HEAD']
repo.head.reset(index=True, working_tree=True)
repo.git.clean('-x', '-f', '-d')
def getBranchHead(self, branch):
return self.repo.heads[branch]
repo = self.createRepoObject()
branch_head = repo.heads[branch]
return branch_head
def checkout(self, ref):
self._ensure_cloned()
repo = self.createRepoObject()
self.log.debug("Checking out %s" % ref)
self.repo.head.reference = ref
self.repo.head.reset(index=True, working_tree=True)
repo.head.reference = ref
repo.head.reset(index=True, working_tree=True)
def cherryPick(self, ref):
self._ensure_cloned()
repo = self.createRepoObject()
self.log.debug("Cherry-picking %s" % ref)
self.fetch(ref)
self.repo.git.cherry_pick("FETCH_HEAD")
repo.git.cherry_pick("FETCH_HEAD")
def merge(self, ref, strategy=None):
self._ensure_cloned()
repo = self.createRepoObject()
args = []
if strategy:
args += ['-s', strategy]
args.append('FETCH_HEAD')
self.fetch(ref)
self.log.debug("Merging %s with args %s" % (ref, args))
self.repo.git.merge(*args)
repo.git.merge(*args)
def fetch(self, ref):
self._ensure_cloned()
repo = self.createRepoObject()
# The git.remote.fetch method may read in git progress info and
# interpret it improperly causing an AssertionError. Because the
# data was fetched properly subsequent fetches don't seem to fail.
# So try again if an AssertionError is caught.
origin = self.repo.remotes.origin
origin = repo.remotes.origin
try:
origin.fetch(ref)
except AssertionError:
origin.fetch(ref)
# If the repository is packed, and we fetch a change that is
# also entirely packed, the cache may be out of date for the
# same reason as reset() above. Avoid these problems by
# recreating the repo object.
# https://bugs.launchpad.net/zuul/+bug/1078946
self.repo = git.Repo(self.local_path)
def createZuulRef(self, ref, commit='HEAD'):
self._ensure_cloned()
repo = self.createRepoObject()
self.log.debug("CreateZuulRef %s at %s " % (ref, commit))
ref = ZuulReference.create(self.repo, ref, commit)
ref = ZuulReference.create(repo, ref, commit)
return ref.commit
def push(self, local, remote):
self._ensure_cloned()
repo = self.createRepoObject()
self.log.debug("Pushing %s:%s to %s " % (local, remote,
self.remote_url))
self.repo.remotes.origin.push('%s:%s' % (local, remote))
repo.remotes.origin.push('%s:%s' % (local, remote))
def update(self):
self._ensure_cloned()
repo = self.createRepoObject()
self.log.debug("Updating repository %s" % self.local_path)
origin = self.repo.remotes.origin
origin = repo.remotes.origin
origin.update()
# If the remote repository is repacked, the repo object's
# cache may be out of date. Specifically, it caches whether
# to check the loose or packed DB for a given SHA. Further,
# if there was no pack or lose directory to start with, the
# repo object may not even have a database for it. Avoid
# these problems by recreating the repo object.
self.repo = git.Repo(self.local_path)
class Merger(object):
@ -180,9 +173,7 @@ class Merger(object):
self.log.exception("Unable to add project %s" % project)
def getRepo(self, project):
r = self.repos.get(project, None)
r.recreateRepoObject()
return r
return self.repos.get(project, None)
def updateRepo(self, project):
repo = self.getRepo(project)