Improve performance of _saveRepoState
In case of large repository with more than 10k refs, this method use actualy an async call from Gitpython to retrieve sha1. Gitpython open file filesystem for each refs For example with repository with 18k tags, a merger instance take 100% of one CPU (not threadless) for ~ 3min to perform the loop To improve this, we store all sha1 of tag directly from a git command (for_each_ref), this method open once the packedref of repository to extract all refs. If a ref is not in the dict we use fallback method `ref.object` Change-Id: I8b52b39cb79527791a34ac98a25e7ee41c8d4956
This commit is contained in:
parent
da9df89591
commit
6b4b293311
|
@ -440,6 +440,22 @@ class Repo(object):
|
|||
repo = self.createRepoObject(zuul_event_id)
|
||||
return repo.refs
|
||||
|
||||
def getPackedRefs(self, zuul_event_id=None):
|
||||
repo = self.createRepoObject(zuul_event_id)
|
||||
refs = repo.git.for_each_ref(
|
||||
'--format=%(objectname) %(objecttype) %(refname)'
|
||||
)
|
||||
packed_refs = {}
|
||||
for ref in refs.splitlines():
|
||||
parts = ref.split(" ")
|
||||
if len(parts) == 3:
|
||||
packed_refs[parts[2]] = {
|
||||
"ref": parts[2],
|
||||
"commit": parts[0],
|
||||
"type": parts[1]
|
||||
}
|
||||
return packed_refs
|
||||
|
||||
def setRef(self, path, hexsha, repo=None, zuul_event_id=None):
|
||||
ref_log = get_annotated_logger(
|
||||
logging.getLogger("zuul.Repo.Ref"), zuul_event_id)
|
||||
|
@ -936,6 +952,9 @@ class Merger(object):
|
|||
repo_state, recent, branches):
|
||||
projects = repo_state.setdefault(connection_name, {})
|
||||
project = projects.setdefault(project_name, {})
|
||||
|
||||
packed_refs = repo.getPackedRefs()
|
||||
|
||||
for ref in repo.getRefs():
|
||||
if ref.path.startswith('refs/zuul/'):
|
||||
continue
|
||||
|
@ -948,7 +967,11 @@ class Merger(object):
|
|||
key = (connection_name, project_name, branch)
|
||||
if key not in recent:
|
||||
recent[key] = ref.object
|
||||
project[ref.path] = ref.object.hexsha
|
||||
|
||||
if ref.path in packed_refs:
|
||||
project[ref.path] = packed_refs[ref.path]["commit"]
|
||||
else:
|
||||
project[ref.path] = ref.object.hexsha
|
||||
|
||||
def _alterRepoState(self, connection_name, project_name,
|
||||
repo_state, path, hexsha):
|
||||
|
|
Loading…
Reference in New Issue