Improve performance of _saveRepoState

In case of large repository with more than 10k refs,
this method use actualy an async call from Gitpython to retrieve sha1.
Gitpython open file filesystem for each refs

For example with repository with 18k tags,
a merger instance take 100% of one CPU (not threadless) for ~ 3min
to perform the loop

To improve this, we store all sha1 of tag directly from
a git command (for_each_ref), this method open once the packedref
of repository to extract all refs.

If a ref is not in the dict we use fallback method `ref.object`

Also we have scope this new workflow to tags because
is normaly statics refs

Change-Id: I8b52b39cb79527791a34ac98a25e7ee41c8d4956
This commit is contained in:
Andy Ladjadj 2021-07-13 16:09:40 +02:00
parent da9df89591
commit bfe77b018e
1 changed files with 24 additions and 1 deletions

View File

@ -440,6 +440,22 @@ class Repo(object):
repo = self.createRepoObject(zuul_event_id)
return repo.refs
def getPackedRefs(self, zuul_event_id=None):
repo = self.createRepoObject(zuul_event_id)
refs = repo.git.for_each_ref(
'--format=%(objectname) %(objecttype) %(refname)'
)
packed_refs = {}
for ref in refs.splitlines():
parts = ref.split(" ")
if len(parts) == 3:
packed_refs[parts[2]] = {
"ref": parts[2],
"commit": parts[0],
"type": parts[1]
}
return packed_refs
def setRef(self, path, hexsha, repo=None, zuul_event_id=None):
ref_log = get_annotated_logger(
logging.getLogger("zuul.Repo.Ref"), zuul_event_id)
@ -936,6 +952,9 @@ class Merger(object):
repo_state, recent, branches):
projects = repo_state.setdefault(connection_name, {})
project = projects.setdefault(project_name, {})
packed_refs = repo.getPackedRefs()
for ref in repo.getRefs():
if ref.path.startswith('refs/zuul/'):
continue
@ -948,7 +967,11 @@ class Merger(object):
key = (connection_name, project_name, branch)
if key not in recent:
recent[key] = ref.object
project[ref.path] = ref.object.hexsha
if ref.path in packed_refs:
project[ref.path] = packed_refs[ref.path]["commit"]
else:
project[ref.path] = ref.object.hexsha
def _alterRepoState(self, connection_name, project_name,
repo_state, path, hexsha):