diff --git a/doc/source/cloner.rst b/doc/source/cloner.rst index bb33f828ef..2ddf0b59c9 100644 --- a/doc/source/cloner.rst +++ b/doc/source/cloner.rst @@ -75,3 +75,15 @@ cloner processes the clones in the order supplied, so you should swap the projects:: zuul-cloner project project/plugins/plugin1 + +Cached repositories +------------------- + +The ``--cache-dir`` option can be used to reduce network traffic by +cloning from a local repository which may not be up to date. + +If the ``--cache-dir`` option is supplied, zuul-cloner will start by +cloning any projects it processes from those found in that directory. +The URL of origin remote of the resulting clone will be reset to use +the ``git_base_url`` and then the remote will be updated so that the +repository has all the information in the upstream repository. diff --git a/tests/test_cloner.py b/tests/test_cloner.py index 166c486666..ab2683d814 100644 --- a/tests/test_cloner.py +++ b/tests/test_cloner.py @@ -59,6 +59,64 @@ class TestCloner(ZuulTestCase): os.path.join(self.upstream_root, project)) return repos + def test_cache_dir(self): + projects = ['org/project1', 'org/project2'] + cache_root = os.path.join(self.test_root, "cache") + for project in projects: + upstream_repo_path = os.path.join(self.upstream_root, project) + cache_repo_path = os.path.join(cache_root, project) + git.Repo.clone_from(upstream_repo_path, cache_repo_path) + + self.worker.hold_jobs_in_build = True + A = self.fake_gerrit.addFakeChange('org/project1', 'master', 'A') + A.addApproval('CRVW', 2) + self.fake_gerrit.addEvent(A.addApproval('APRV', 1)) + + self.waitUntilSettled() + + self.assertEquals(1, len(self.builds), "One build is running") + + B = self.fake_gerrit.addFakeChange('org/project2', 'master', 'B') + B.setMerged() + + upstream = self.getUpstreamRepos(projects) + states = [ + {'org/project1': self.builds[0].parameters['ZUUL_COMMIT'], + 'org/project2': str(upstream['org/project2'].commit('master')), + }, + ] + + for number, build in enumerate(self.builds): + self.log.debug("Build parameters: %s", build.parameters) + cloner = zuul.lib.cloner.Cloner( + git_base_url=self.upstream_root, + projects=projects, + workspace=self.workspace_root, + zuul_branch=build.parameters['ZUUL_BRANCH'], + zuul_ref=build.parameters['ZUUL_REF'], + zuul_url=self.git_root, + cache_dir=cache_root, + ) + cloner.execute() + work = self.getWorkspaceRepos(projects) + state = states[number] + + for project in projects: + self.assertEquals(state[project], + str(work[project].commit('HEAD')), + 'Project %s commit for build %s should ' + 'be correct' % (project, number)) + + work = self.getWorkspaceRepos(projects) + upstream_repo_path = os.path.join(self.upstream_root, 'org/project1') + self.assertEquals(work['org/project1'].remotes.origin.url, + upstream_repo_path, + 'workspace repo origin should be upstream, not cache') + + self.worker.hold_jobs_in_build = False + self.worker.release() + self.waitUntilSettled() + def test_one_branch(self): self.worker.hold_jobs_in_build = True diff --git a/zuul/cmd/cloner.py b/zuul/cmd/cloner.py index 2fcaacd299..a895f2433b 100755 --- a/zuul/cmd/cloner.py +++ b/zuul/cmd/cloner.py @@ -54,6 +54,10 @@ class Cloner(zuul.cmd.ZuulApp): parser.add_argument('--version', dest='version', action='version', version=self._get_version(), help='show zuul version') + parser.add_argument('--cache-dir', dest='cache_dir', + help=('a directory that holds cached copies of ' + 'repos from which to make an initial clone.' + )) parser.add_argument('git_base_url', help='reference repo to clone from') parser.add_argument('projects', nargs='+', @@ -142,6 +146,7 @@ class Cloner(zuul.cmd.ZuulApp): branch=self.args.branch, clone_map_file=self.args.clone_map_file, project_branches=project_branches, + cache_dir=self.args.cache_dir, ) cloner.execute() diff --git a/zuul/lib/cloner.py b/zuul/lib/cloner.py index 579b9c7e10..89ebada0eb 100644 --- a/zuul/lib/cloner.py +++ b/zuul/lib/cloner.py @@ -29,13 +29,14 @@ class Cloner(object): def __init__(self, git_base_url, projects, workspace, zuul_branch, zuul_ref, zuul_url, branch=None, clone_map_file=None, - project_branches=None): + project_branches=None, cache_dir=None): self.clone_map = [] self.dests = None self.branch = branch self.git_url = git_base_url + self.cache_dir = cache_dir self.projects = projects self.workspace = workspace self.zuul_branch = zuul_branch @@ -66,9 +67,24 @@ class Cloner(object): self.log.info("Prepared all repositories") def cloneUpstream(self, project, dest): + # Check for a cached git repo first + git_cache = '%s/%s' % (self.cache_dir, project) git_upstream = '%s/%s' % (self.git_url, project) - self.log.info("Creating repo %s from upstream %s", - project, git_upstream) + if (self.cache_dir and + os.path.exists(git_cache) and + not os.path.exists(dest)): + # file:// tells git not to hard-link across repos + git_cache = 'file://%s' % git_cache + self.log.info("Creating repo %s from cache %s", + project, git_cache) + new_repo = git.Repo.clone_from(git_cache, dest) + self.log.info("Updating origin remote in repo %s to %s", + project, git_upstream) + origin = new_repo.remotes.origin.config_writer.set( + 'url', git_upstream) + else: + self.log.info("Creating repo %s from upstream %s", + project, git_upstream) repo = Repo( remote=git_upstream, local=dest,