Add cache-dir argument to cloner

Clone repos from cache-dir if they exist, then reset their origins
to be the git base url and update from there.

Change-Id: Ic42961aa5da9e4a66524ec14758d421944403f91
This commit is contained in:
James E. Blair 2014-08-25 15:07:21 -07:00
parent f04202243e
commit 48ff4d5fe3
4 changed files with 94 additions and 3 deletions

View File

@ -75,3 +75,15 @@ cloner processes the clones in the order supplied, so you should swap the
projects::
zuul-cloner project project/plugins/plugin1
Cached repositories
-------------------
The ``--cache-dir`` option can be used to reduce network traffic by
cloning from a local repository which may not be up to date.
If the ``--cache-dir`` option is supplied, zuul-cloner will start by
cloning any projects it processes from those found in that directory.
The URL of origin remote of the resulting clone will be reset to use
the ``git_base_url`` and then the remote will be updated so that the
repository has all the information in the upstream repository.

View File

@ -59,6 +59,64 @@ class TestCloner(ZuulTestCase):
os.path.join(self.upstream_root, project))
return repos
def test_cache_dir(self):
projects = ['org/project1', 'org/project2']
cache_root = os.path.join(self.test_root, "cache")
for project in projects:
upstream_repo_path = os.path.join(self.upstream_root, project)
cache_repo_path = os.path.join(cache_root, project)
git.Repo.clone_from(upstream_repo_path, cache_repo_path)
self.worker.hold_jobs_in_build = True
A = self.fake_gerrit.addFakeChange('org/project1', 'master', 'A')
A.addApproval('CRVW', 2)
self.fake_gerrit.addEvent(A.addApproval('APRV', 1))
self.waitUntilSettled()
self.assertEquals(1, len(self.builds), "One build is running")
B = self.fake_gerrit.addFakeChange('org/project2', 'master', 'B')
B.setMerged()
upstream = self.getUpstreamRepos(projects)
states = [
{'org/project1': self.builds[0].parameters['ZUUL_COMMIT'],
'org/project2': str(upstream['org/project2'].commit('master')),
},
]
for number, build in enumerate(self.builds):
self.log.debug("Build parameters: %s", build.parameters)
cloner = zuul.lib.cloner.Cloner(
git_base_url=self.upstream_root,
projects=projects,
workspace=self.workspace_root,
zuul_branch=build.parameters['ZUUL_BRANCH'],
zuul_ref=build.parameters['ZUUL_REF'],
zuul_url=self.git_root,
cache_dir=cache_root,
)
cloner.execute()
work = self.getWorkspaceRepos(projects)
state = states[number]
for project in projects:
self.assertEquals(state[project],
str(work[project].commit('HEAD')),
'Project %s commit for build %s should '
'be correct' % (project, number))
work = self.getWorkspaceRepos(projects)
upstream_repo_path = os.path.join(self.upstream_root, 'org/project1')
self.assertEquals(work['org/project1'].remotes.origin.url,
upstream_repo_path,
'workspace repo origin should be upstream, not cache')
self.worker.hold_jobs_in_build = False
self.worker.release()
self.waitUntilSettled()
def test_one_branch(self):
self.worker.hold_jobs_in_build = True

View File

@ -54,6 +54,10 @@ class Cloner(zuul.cmd.ZuulApp):
parser.add_argument('--version', dest='version', action='version',
version=self._get_version(),
help='show zuul version')
parser.add_argument('--cache-dir', dest='cache_dir',
help=('a directory that holds cached copies of '
'repos from which to make an initial clone.'
))
parser.add_argument('git_base_url',
help='reference repo to clone from')
parser.add_argument('projects', nargs='+',
@ -142,6 +146,7 @@ class Cloner(zuul.cmd.ZuulApp):
branch=self.args.branch,
clone_map_file=self.args.clone_map_file,
project_branches=project_branches,
cache_dir=self.args.cache_dir,
)
cloner.execute()

View File

@ -29,13 +29,14 @@ class Cloner(object):
def __init__(self, git_base_url, projects, workspace, zuul_branch,
zuul_ref, zuul_url, branch=None, clone_map_file=None,
project_branches=None):
project_branches=None, cache_dir=None):
self.clone_map = []
self.dests = None
self.branch = branch
self.git_url = git_base_url
self.cache_dir = cache_dir
self.projects = projects
self.workspace = workspace
self.zuul_branch = zuul_branch
@ -66,9 +67,24 @@ class Cloner(object):
self.log.info("Prepared all repositories")
def cloneUpstream(self, project, dest):
# Check for a cached git repo first
git_cache = '%s/%s' % (self.cache_dir, project)
git_upstream = '%s/%s' % (self.git_url, project)
self.log.info("Creating repo %s from upstream %s",
project, git_upstream)
if (self.cache_dir and
os.path.exists(git_cache) and
not os.path.exists(dest)):
# file:// tells git not to hard-link across repos
git_cache = 'file://%s' % git_cache
self.log.info("Creating repo %s from cache %s",
project, git_cache)
new_repo = git.Repo.clone_from(git_cache, dest)
self.log.info("Updating origin remote in repo %s to %s",
project, git_upstream)
origin = new_repo.remotes.origin.config_writer.set(
'url', git_upstream)
else:
self.log.info("Creating repo %s from upstream %s",
project, git_upstream)
repo = Repo(
remote=git_upstream,
local=dest,