From aa411233501681f1f26914cfb2affb4afc3765e6 Mon Sep 17 00:00:00 2001 From: Tobias Henkel Date: Wed, 27 Mar 2019 21:03:01 +0100 Subject: [PATCH] Update cached repo during job startup only if needed Currently we update each cached repo that is involved in a job during the start phase of the job. After updating we clone each of these repos into the work dir. However when running a job we already got a repo state containing all references we need. So we can check if the cached repo already has everything we need. If the repo state doesn't contain revisions we don't have yet we can skip updating the repo and thus safe a fetch. This can improve job startup times significally especially for expensive jobs with many required projects. Change-Id: I9364e438d581b068fa19c9dfc24adab60479c385 --- tests/unit/test_merger_repo.py | 49 ++++++++++++++++++++++++++++++++++ zuul/executor/server.py | 30 ++++++++++++++------- zuul/merger/merger.py | 27 ++++++++++++++++--- 3 files changed, 92 insertions(+), 14 deletions(-) diff --git a/tests/unit/test_merger_repo.py b/tests/unit/test_merger_repo.py index f6ffbe36a8..f42bc1e5b8 100644 --- a/tests/unit/test_merger_repo.py +++ b/tests/unit/test_merger_repo.py @@ -302,6 +302,55 @@ class TestMergerRepo(ZuulTestCase): "none@example.org", "User Name", "0", "0") self.assertIn(local_path, repr(repo)) + def test_update_needed(self): + parent_path = os.path.join(self.upstream_root, 'org/project1') + repo = git.Repo(parent_path) + self.create_branch('org/project1', 'stable') + + repo_state_no_update_master = { + 'refs/heads/master': repo.commit('refs/heads/master').hexsha, + } + repo_state_no_update = { + 'refs/heads/master': repo.commit('refs/heads/master').hexsha, + 'refs/heads/stable': repo.commit('refs/heads/stable').hexsha, + } + repo_state_update = { + 'refs/heads/master': repo.commit('refs/heads/master').hexsha, + 'refs/heads/stable': repo.commit('refs/heads/stable').hexsha, + 'refs/heads/test': '1234567', + } + + work_repo = Repo(parent_path, self.workspace_root, + 'none@example.org', 'User Name', '0', '0') + self.assertFalse(work_repo.isUpdateNeeded(repo_state_no_update_master)) + self.assertFalse(work_repo.isUpdateNeeded(repo_state_no_update)) + self.assertTrue(work_repo.isUpdateNeeded(repo_state_update)) + + # Get repo and update for the first time. + merger = self.executor_server.merger + merger.updateRepo('gerrit', 'org/project1') + repo = merger.getRepo('gerrit', 'org/project1') + + # Branches master and stable must exist + self.assertEqual(['master', 'stable'], repo.getBranches()) + + # Now create an additional branch in the parent repo + self.create_branch('org/project1', 'stable2') + + # Update with repo state and expect no update done + self.log.info('Calling updateRepo with repo_state_no_update') + merger.updateRepo('gerrit', 'org/project1', + repo_state=repo_state_no_update) + repo = merger.getRepo('gerrit', 'org/project1') + self.assertEqual(['master', 'stable'], repo.getBranches()) + + # Update with repo state and expect update + self.log.info('Calling updateRepo with repo_state_update') + merger.updateRepo('gerrit', 'org/project1', + repo_state=repo_state_update) + repo = merger.getRepo('gerrit', 'org/project1') + self.assertEqual(['master', 'stable', 'stable2'], repo.getBranches()) + class TestMergerWithAuthUrl(ZuulTestCase): config_file = 'zuul-github-driver.conf' diff --git a/zuul/executor/server.py b/zuul/executor/server.py index 7ee3b24844..d0ec5d883b 100644 --- a/zuul/executor/server.py +++ b/zuul/executor/server.py @@ -514,10 +514,11 @@ class JobDir(object): class UpdateTask(object): - def __init__(self, connection_name, project_name, zuul_event_id=None, - build=None): + def __init__(self, connection_name, project_name, repo_state=None, + zuul_event_id=None, build=None): self.connection_name = connection_name self.project_name = project_name + self.repo_state = repo_state self.canonical_name = None self.branches = None self.refs = None @@ -530,7 +531,8 @@ class UpdateTask(object): def __eq__(self, other): if (other and other.connection_name == self.connection_name and - other.project_name == self.project_name): + other.project_name == self.project_name and + other.repo_state == self.repo_state): return True return False @@ -827,12 +829,14 @@ class AnsibleJob(object): self.log.debug("Job root: %s" % (self.jobdir.root,)) tasks = [] projects = set() + repo_state = args['repo_state'] # Make sure all projects used by the job are updated... for project in args['projects']: self.log.debug("Updating project %s" % (project,)) tasks.append(self.executor_server.update( project['connection'], project['name'], + repo_state=repo_state, zuul_event_id=self.zuul_event_id, build=self.job.unique)) projects.add((project['connection'], project['name'])) @@ -850,7 +854,8 @@ class AnsibleJob(object): key = (repo['connection'], repo['project']) if key not in projects: tasks.append(self.executor_server.update( - *key, zuul_event_id=self.zuul_event_id, + *key, repo_state=repo_state, + zuul_event_id=self.zuul_event_id, build=self.job.unique)) projects.add(key) @@ -885,8 +890,7 @@ class AnsibleJob(object): merge_items = [i for i in args['items'] if i.get('number')] if merge_items: - item_commit = self.doMergeChanges(merger, merge_items, - args['repo_state']) + item_commit = self.doMergeChanges(merger, merge_items, repo_state) if item_commit is None: # There was a merge conflict and we have already sent # a work complete result, don't run any jobs @@ -894,7 +898,7 @@ class AnsibleJob(object): state_items = [i for i in args['items'] if not i.get('number')] if state_items: - merger.setRepoState(state_items, args['repo_state']) + merger.setRepoState(state_items, repo_state) for project in args['projects']: repo = repos[project['canonical_name']] @@ -2539,6 +2543,7 @@ class ExecutorServer(object): task.connection_name, task.project_name) self.merger.updateRepo( task.connection_name, task.project_name, + repo_state=task.repo_state, zuul_event_id=task.zuul_event_id, build=task.build) repo = self.merger.getRepo( task.connection_name, task.project_name) @@ -2556,10 +2561,15 @@ class ExecutorServer(object): finally: task.setComplete() - def update(self, connection_name, project_name, zuul_event_id=None, - build=None): + def update(self, connection_name, project_name, repo_state=None, + zuul_event_id=None, build=None): # Update a repository in the main merger - task = UpdateTask(connection_name, project_name, + + state = None + if repo_state: + state = repo_state.get(connection_name, {}).get(project_name) + + task = UpdateTask(connection_name, project_name, repo_state=state, zuul_event_id=zuul_event_id, build=build) task = self.update_queue.put(task) return task diff --git a/zuul/merger/merger.py b/zuul/merger/merger.py index 308383f74e..57c2327f9b 100644 --- a/zuul/merger/merger.py +++ b/zuul/merger/merger.py @@ -458,6 +458,17 @@ class Repo(object): self._git_fetch(repo, 'origin', zuul_event_id) self._git_fetch(repo, 'origin', zuul_event_id, tags=True) + def isUpdateNeeded(self, repo_state, zuul_event_id=None): + repo = self.createRepoObject(zuul_event_id) + for rev in repo_state.values(): + try: + repo.commit(rev) + except Exception: + # GitPython throws an error if a revision does not + # exist + return True + return False + def getFiles(self, files, dirs=[], branch=None, commit=None, zuul_event_id=None): ret = {} @@ -595,15 +606,23 @@ class Merger(object): return self._addProject(hostname, project_name, url, sshkey, zuul_event_id) - def updateRepo(self, connection_name, project_name, zuul_event_id=None, + def updateRepo(self, connection_name, project_name, repo_state=None, + zuul_event_id=None, build=None): log = get_annotated_logger(self.log, zuul_event_id, build=build) repo = self.getRepo(connection_name, project_name, zuul_event_id=zuul_event_id) try: - log.info("Updating local repository %s/%s", - connection_name, project_name) - repo.reset(zuul_event_id=zuul_event_id, build=build) + + # Check if we need an update if we got a repo_state + if repo_state and not repo.isUpdateNeeded( + repo_state, zuul_event_id=zuul_event_id): + log.info("Skipping updating local repository %s/%s", + connection_name, project_name) + else: + log.info("Updating local repository %s/%s", + connection_name, project_name) + repo.reset(zuul_event_id=zuul_event_id, build=build) except Exception: log.exception("Unable to update %s/%s", connection_name, project_name)