GC git.Repo objects in merger.Repo operations.
* zuul/merger.py: Zuul's merger.Repo is holding on to repo objects that keep many file descriptors open. Turn all git.Repo objects into method local references so that the git.Repo objects can be garbage collected once these methods are popped off the stack. GCing these objects frees the file descriptors associated with that repo. This adds overhead to repo operations as each operation must do initial setup IO. * tests/test_scheduler.py: Assert that no instances of git.Repo are known to the python garbage collector. Do the assertion after running a fully garbage collection run. This is a cheap check that repo objects are being garbage collected properly at the end of each test. Change-Id: I1d67981f32708a85af62ca622402de9fac0e1842
This commit is contained in:
parent
6dbbc488d2
commit
4ba48d9967
|
@ -16,6 +16,7 @@
|
|||
|
||||
import ConfigParser
|
||||
from cStringIO import StringIO
|
||||
import gc
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
|
@ -834,6 +835,13 @@ class TestScheduler(testtools.TestCase):
|
|||
def assertFinalState(self):
|
||||
# Make sure that the change cache is cleared
|
||||
self.assertEqual(len(self.gerrit._change_cache.keys()), 0)
|
||||
# Make sure that git.Repo objects have been garbage collected.
|
||||
repos = []
|
||||
gc.collect()
|
||||
for obj in gc.get_objects():
|
||||
if isinstance(obj, git.Repo):
|
||||
repos.append(obj)
|
||||
self.assertEqual(len(repos), 0)
|
||||
self.assertEmptyQueues()
|
||||
|
||||
def shutdown(self):
|
||||
|
|
|
@ -46,103 +46,96 @@ class Repo(object):
|
|||
self.log.debug("Cloning from %s to %s" % (self.remote_url,
|
||||
self.local_path))
|
||||
git.Repo.clone_from(self.remote_url, self.local_path)
|
||||
self.repo = git.Repo(self.local_path)
|
||||
repo = git.Repo(self.local_path)
|
||||
if self.email:
|
||||
self.repo.config_writer().set_value('user', 'email',
|
||||
self.email)
|
||||
repo.config_writer().set_value('user', 'email',
|
||||
self.email)
|
||||
if self.username:
|
||||
self.repo.config_writer().set_value('user', 'name',
|
||||
self.username)
|
||||
self.repo.config_writer().write()
|
||||
repo.config_writer().set_value('user', 'name',
|
||||
self.username)
|
||||
repo.config_writer().write()
|
||||
self._initialized = True
|
||||
|
||||
def recreateRepoObject(self):
|
||||
self._ensure_cloned()
|
||||
self.repo = git.Repo(self.local_path)
|
||||
def createRepoObject(self):
|
||||
try:
|
||||
self._ensure_cloned()
|
||||
repo = git.Repo(self.local_path)
|
||||
except:
|
||||
self.log.exception("Unable to initialize repo for %s" %
|
||||
self.local_path)
|
||||
return repo
|
||||
|
||||
def reset(self):
|
||||
self._ensure_cloned()
|
||||
repo = self.createRepoObject()
|
||||
self.log.debug("Resetting repository %s" % self.local_path)
|
||||
self.update()
|
||||
origin = self.repo.remotes.origin
|
||||
origin = repo.remotes.origin
|
||||
for ref in origin.refs:
|
||||
if ref.remote_head == 'HEAD':
|
||||
continue
|
||||
self.repo.create_head(ref.remote_head, ref, force=True)
|
||||
repo.create_head(ref.remote_head, ref, force=True)
|
||||
|
||||
# Reset to remote HEAD (usually origin/master)
|
||||
self.repo.head.reference = origin.refs['HEAD']
|
||||
self.repo.head.reset(index=True, working_tree=True)
|
||||
self.repo.git.clean('-x', '-f', '-d')
|
||||
repo.head.reference = origin.refs['HEAD']
|
||||
repo.head.reset(index=True, working_tree=True)
|
||||
repo.git.clean('-x', '-f', '-d')
|
||||
|
||||
def getBranchHead(self, branch):
|
||||
return self.repo.heads[branch]
|
||||
repo = self.createRepoObject()
|
||||
branch_head = repo.heads[branch]
|
||||
return branch_head
|
||||
|
||||
def checkout(self, ref):
|
||||
self._ensure_cloned()
|
||||
repo = self.createRepoObject()
|
||||
self.log.debug("Checking out %s" % ref)
|
||||
self.repo.head.reference = ref
|
||||
self.repo.head.reset(index=True, working_tree=True)
|
||||
repo.head.reference = ref
|
||||
repo.head.reset(index=True, working_tree=True)
|
||||
|
||||
def cherryPick(self, ref):
|
||||
self._ensure_cloned()
|
||||
repo = self.createRepoObject()
|
||||
self.log.debug("Cherry-picking %s" % ref)
|
||||
self.fetch(ref)
|
||||
self.repo.git.cherry_pick("FETCH_HEAD")
|
||||
repo.git.cherry_pick("FETCH_HEAD")
|
||||
|
||||
def merge(self, ref, strategy=None):
|
||||
self._ensure_cloned()
|
||||
repo = self.createRepoObject()
|
||||
args = []
|
||||
if strategy:
|
||||
args += ['-s', strategy]
|
||||
args.append('FETCH_HEAD')
|
||||
self.fetch(ref)
|
||||
self.log.debug("Merging %s with args %s" % (ref, args))
|
||||
self.repo.git.merge(*args)
|
||||
repo.git.merge(*args)
|
||||
|
||||
def fetch(self, ref):
|
||||
self._ensure_cloned()
|
||||
repo = self.createRepoObject()
|
||||
# The git.remote.fetch method may read in git progress info and
|
||||
# interpret it improperly causing an AssertionError. Because the
|
||||
# data was fetched properly subsequent fetches don't seem to fail.
|
||||
# So try again if an AssertionError is caught.
|
||||
origin = self.repo.remotes.origin
|
||||
origin = repo.remotes.origin
|
||||
try:
|
||||
origin.fetch(ref)
|
||||
except AssertionError:
|
||||
origin.fetch(ref)
|
||||
|
||||
# If the repository is packed, and we fetch a change that is
|
||||
# also entirely packed, the cache may be out of date for the
|
||||
# same reason as reset() above. Avoid these problems by
|
||||
# recreating the repo object.
|
||||
# https://bugs.launchpad.net/zuul/+bug/1078946
|
||||
self.repo = git.Repo(self.local_path)
|
||||
|
||||
def createZuulRef(self, ref, commit='HEAD'):
|
||||
self._ensure_cloned()
|
||||
repo = self.createRepoObject()
|
||||
self.log.debug("CreateZuulRef %s at %s " % (ref, commit))
|
||||
ref = ZuulReference.create(self.repo, ref, commit)
|
||||
ref = ZuulReference.create(repo, ref, commit)
|
||||
return ref.commit
|
||||
|
||||
def push(self, local, remote):
|
||||
self._ensure_cloned()
|
||||
repo = self.createRepoObject()
|
||||
self.log.debug("Pushing %s:%s to %s " % (local, remote,
|
||||
self.remote_url))
|
||||
self.repo.remotes.origin.push('%s:%s' % (local, remote))
|
||||
repo.remotes.origin.push('%s:%s' % (local, remote))
|
||||
|
||||
def update(self):
|
||||
self._ensure_cloned()
|
||||
repo = self.createRepoObject()
|
||||
self.log.debug("Updating repository %s" % self.local_path)
|
||||
origin = self.repo.remotes.origin
|
||||
origin = repo.remotes.origin
|
||||
origin.update()
|
||||
# If the remote repository is repacked, the repo object's
|
||||
# cache may be out of date. Specifically, it caches whether
|
||||
# to check the loose or packed DB for a given SHA. Further,
|
||||
# if there was no pack or lose directory to start with, the
|
||||
# repo object may not even have a database for it. Avoid
|
||||
# these problems by recreating the repo object.
|
||||
self.repo = git.Repo(self.local_path)
|
||||
|
||||
|
||||
class Merger(object):
|
||||
|
@ -180,9 +173,7 @@ class Merger(object):
|
|||
self.log.exception("Unable to add project %s" % project)
|
||||
|
||||
def getRepo(self, project):
|
||||
r = self.repos.get(project, None)
|
||||
r.recreateRepoObject()
|
||||
return r
|
||||
return self.repos.get(project, None)
|
||||
|
||||
def updateRepo(self, project):
|
||||
repo = self.getRepo(project)
|
||||
|
|
Loading…
Reference in New Issue