Update cached repo during job startup only if needed

Currently we update each cached repo that is involved in a job during
the start phase of the job. After updating we clone each of these
repos into the work dir.  However when running a job we already got a
repo state containing all references we need. So we can check if the
cached repo already has everything we need. If the repo state doesn't
contain revisions we don't have yet we can skip updating the repo and
thus safe a fetch.

This can improve job startup times significally especially for
expensive jobs with many required projects.

Change-Id: I9364e438d581b068fa19c9dfc24adab60479c385
This commit is contained in:
Tobias Henkel 2019-03-27 21:03:01 +01:00
parent 09f349a793
commit aa41123350
No known key found for this signature in database
GPG Key ID: 03750DEC158E5FA2
3 changed files with 92 additions and 14 deletions

View File

@ -302,6 +302,55 @@ class TestMergerRepo(ZuulTestCase):
"none@example.org", "User Name", "0", "0")
self.assertIn(local_path, repr(repo))
def test_update_needed(self):
parent_path = os.path.join(self.upstream_root, 'org/project1')
repo = git.Repo(parent_path)
self.create_branch('org/project1', 'stable')
repo_state_no_update_master = {
'refs/heads/master': repo.commit('refs/heads/master').hexsha,
}
repo_state_no_update = {
'refs/heads/master': repo.commit('refs/heads/master').hexsha,
'refs/heads/stable': repo.commit('refs/heads/stable').hexsha,
}
repo_state_update = {
'refs/heads/master': repo.commit('refs/heads/master').hexsha,
'refs/heads/stable': repo.commit('refs/heads/stable').hexsha,
'refs/heads/test': '1234567',
}
work_repo = Repo(parent_path, self.workspace_root,
'none@example.org', 'User Name', '0', '0')
self.assertFalse(work_repo.isUpdateNeeded(repo_state_no_update_master))
self.assertFalse(work_repo.isUpdateNeeded(repo_state_no_update))
self.assertTrue(work_repo.isUpdateNeeded(repo_state_update))
# Get repo and update for the first time.
merger = self.executor_server.merger
merger.updateRepo('gerrit', 'org/project1')
repo = merger.getRepo('gerrit', 'org/project1')
# Branches master and stable must exist
self.assertEqual(['master', 'stable'], repo.getBranches())
# Now create an additional branch in the parent repo
self.create_branch('org/project1', 'stable2')
# Update with repo state and expect no update done
self.log.info('Calling updateRepo with repo_state_no_update')
merger.updateRepo('gerrit', 'org/project1',
repo_state=repo_state_no_update)
repo = merger.getRepo('gerrit', 'org/project1')
self.assertEqual(['master', 'stable'], repo.getBranches())
# Update with repo state and expect update
self.log.info('Calling updateRepo with repo_state_update')
merger.updateRepo('gerrit', 'org/project1',
repo_state=repo_state_update)
repo = merger.getRepo('gerrit', 'org/project1')
self.assertEqual(['master', 'stable', 'stable2'], repo.getBranches())
class TestMergerWithAuthUrl(ZuulTestCase):
config_file = 'zuul-github-driver.conf'

View File

@ -514,10 +514,11 @@ class JobDir(object):
class UpdateTask(object):
def __init__(self, connection_name, project_name, zuul_event_id=None,
build=None):
def __init__(self, connection_name, project_name, repo_state=None,
zuul_event_id=None, build=None):
self.connection_name = connection_name
self.project_name = project_name
self.repo_state = repo_state
self.canonical_name = None
self.branches = None
self.refs = None
@ -530,7 +531,8 @@ class UpdateTask(object):
def __eq__(self, other):
if (other and other.connection_name == self.connection_name and
other.project_name == self.project_name):
other.project_name == self.project_name and
other.repo_state == self.repo_state):
return True
return False
@ -827,12 +829,14 @@ class AnsibleJob(object):
self.log.debug("Job root: %s" % (self.jobdir.root,))
tasks = []
projects = set()
repo_state = args['repo_state']
# Make sure all projects used by the job are updated...
for project in args['projects']:
self.log.debug("Updating project %s" % (project,))
tasks.append(self.executor_server.update(
project['connection'], project['name'],
repo_state=repo_state,
zuul_event_id=self.zuul_event_id,
build=self.job.unique))
projects.add((project['connection'], project['name']))
@ -850,7 +854,8 @@ class AnsibleJob(object):
key = (repo['connection'], repo['project'])
if key not in projects:
tasks.append(self.executor_server.update(
*key, zuul_event_id=self.zuul_event_id,
*key, repo_state=repo_state,
zuul_event_id=self.zuul_event_id,
build=self.job.unique))
projects.add(key)
@ -885,8 +890,7 @@ class AnsibleJob(object):
merge_items = [i for i in args['items'] if i.get('number')]
if merge_items:
item_commit = self.doMergeChanges(merger, merge_items,
args['repo_state'])
item_commit = self.doMergeChanges(merger, merge_items, repo_state)
if item_commit is None:
# There was a merge conflict and we have already sent
# a work complete result, don't run any jobs
@ -894,7 +898,7 @@ class AnsibleJob(object):
state_items = [i for i in args['items'] if not i.get('number')]
if state_items:
merger.setRepoState(state_items, args['repo_state'])
merger.setRepoState(state_items, repo_state)
for project in args['projects']:
repo = repos[project['canonical_name']]
@ -2539,6 +2543,7 @@ class ExecutorServer(object):
task.connection_name, task.project_name)
self.merger.updateRepo(
task.connection_name, task.project_name,
repo_state=task.repo_state,
zuul_event_id=task.zuul_event_id, build=task.build)
repo = self.merger.getRepo(
task.connection_name, task.project_name)
@ -2556,10 +2561,15 @@ class ExecutorServer(object):
finally:
task.setComplete()
def update(self, connection_name, project_name, zuul_event_id=None,
build=None):
def update(self, connection_name, project_name, repo_state=None,
zuul_event_id=None, build=None):
# Update a repository in the main merger
task = UpdateTask(connection_name, project_name,
state = None
if repo_state:
state = repo_state.get(connection_name, {}).get(project_name)
task = UpdateTask(connection_name, project_name, repo_state=state,
zuul_event_id=zuul_event_id, build=build)
task = self.update_queue.put(task)
return task

View File

@ -458,6 +458,17 @@ class Repo(object):
self._git_fetch(repo, 'origin', zuul_event_id)
self._git_fetch(repo, 'origin', zuul_event_id, tags=True)
def isUpdateNeeded(self, repo_state, zuul_event_id=None):
repo = self.createRepoObject(zuul_event_id)
for rev in repo_state.values():
try:
repo.commit(rev)
except Exception:
# GitPython throws an error if a revision does not
# exist
return True
return False
def getFiles(self, files, dirs=[], branch=None, commit=None,
zuul_event_id=None):
ret = {}
@ -595,15 +606,23 @@ class Merger(object):
return self._addProject(hostname, project_name, url, sshkey,
zuul_event_id)
def updateRepo(self, connection_name, project_name, zuul_event_id=None,
def updateRepo(self, connection_name, project_name, repo_state=None,
zuul_event_id=None,
build=None):
log = get_annotated_logger(self.log, zuul_event_id, build=build)
repo = self.getRepo(connection_name, project_name,
zuul_event_id=zuul_event_id)
try:
log.info("Updating local repository %s/%s",
connection_name, project_name)
repo.reset(zuul_event_id=zuul_event_id, build=build)
# Check if we need an update if we got a repo_state
if repo_state and not repo.isUpdateNeeded(
repo_state, zuul_event_id=zuul_event_id):
log.info("Skipping updating local repository %s/%s",
connection_name, project_name)
else:
log.info("Updating local repository %s/%s",
connection_name, project_name)
repo.reset(zuul_event_id=zuul_event_id, build=build)
except Exception:
log.exception("Unable to update %s/%s",
connection_name, project_name)