Use parallel git clone

The git clones of required roles contained in ansible-role-requirements
are currently single-threaded and can take a significant time when
running scripts/bootstrap-ansible.sh. A new module, git_requirements has
been implemented in order to multithreaded this, which takes
input as provided in clone_roles in get-ansible-requirements.yml.

The result of this implementation is a sizeable speed increase (~factor
of five across the full bootstrap_ansible script) for the git clone
repos task when tested on a two core VM.

The new module requires all repo information ahead of time in order to
distribute the clones between threads. This is unlike the previous
implementation, which performed the clones in sequence and generated
the appropriate data (destination URL, default refspec etc) per loop
iteration. This information is now generated within the ansible module,
with defaults supplied at the task level.

The retry and delay parameters used for the loop with the built in ansible
git module have been re-implemented in the new module to allow force
cloning, alongside retries and time delays for failed clones.
Change-Id: I3cdb248204c5e03ef98803d366e3a19ed7f8bdd8
This commit is contained in:
Duncan Martin Walker 2019-09-30 18:04:32 +01:00 committed by Dmitriy Rabotyagov (noonedeadpunk)
parent 47e5a90a7f
commit bf10b55c9e
5 changed files with 329 additions and 15 deletions

View File

@ -0,0 +1,314 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
from ansible.module_utils.basic import AnsibleModule
import git
import itertools
import multiprocessing
import os
import signal
import time
DOCUMENTATION = """
---
module: git_requirements
short_description: Module to run a multithreaded git clone
options:
repo_info:
description:
- List of repo information dictionaries containing at
a minimum a key entry "src" with the source git URL
to clone for each repo. In these dictionaries, one
can further specify:
"path" - destination clone location
"version" - git version to checkout
"refspec" - git refspec to checkout
"depth" - clone depth level
"force" - require git clone uses "--force"
default_path:
description:
Default git clone path (str) in case not
specified on an individual repo basis in
repo_info. Defaults to "master". Not
required.
default_version:
description:
Default git version (str) in case not
specified on an individual repo basis in
repo_info. Defaults to "master". Not
required.
default_refspec:
description:
Default git repo refspec (str) in case not
specified on an individual repo basis in
repo_info. Defaults to "". Not required.
default_depth:
description:
Default clone depth (int) in case not specified
on an individual repo basis. Defaults to 10.
Not required.
retries:
description:
Integer number of retries allowed in case of git
clone failure. Defaults to 1. Not required.
delay:
description:
Integer time delay (seconds) between git clone
retries in case of failure. Defaults to 0. Not
required.
force:
description:
Boolean. Apply --force flags to git clones wherever
possible. Defaults to True. Not required.
core_multiplier:
description:
Integer multiplier on the number of cores
present on the machine to use for
multithreading. For example, on a 2 core
machine, a multiplier of 4 would use 8
threads. Defaults to 4. Not required.
"""
EXAMPLES = r"""
- name: Clone repos
git_requirements:
repo_info: "[{'src':'https://github.com/ansible/',
'name': 'ansible'
'dest': '/etc/opt/ansible'}]"
"""
def init_signal():
signal.signal(signal.SIGINT, signal.SIG_IGN)
def check_out_version(repo, version, pull=False, force=False,
refspec=None, depth=10):
try:
repo.git.fetch(force=force, refspec=refspec, depth=depth)
except Exception as e:
return ["Failed to fetch %s\n%s" % (repo.working_dir, str(e))]
try:
repo.git.fetch(tags=True, force=force, refspec=refspec, depth=depth)
except Exception as e:
return ["Failed to fetch tags for %s\n%s" % (repo.working_dir, str(e))]
try:
repo.git.checkout(version, force=force)
except Exception as e:
return [
"Failed to check out version %s for %s\n%s" %
(version, repo.working_dir, str(e))]
if pull:
try:
repo.git.pull(force=force, refspec=refspec, depth=depth)
except Exception as e:
return ["Failed to pull repo %s\n%s" % (repo.working_dir, str(e))]
return []
def reset_to_version(path, version, reset_type='--hard', force=False,
refspec=None, depth=10):
"""Function to reset to a specific hash commit"""
modify_repo = git.Repo(path)
try:
modify_repo.git.fetch(force=force, refspec=refspec, depth=depth)
except Exception as e:
return ["Failed to fetch %s\n%s" % (modify_repo.working_dir, str(e))]
try:
modify_repo.git.reset(reset_type, version,
force=force, refspec=refspec)
except Exception as e:
return ["Failed to reset %s\n%s" % (modify_repo.working_dir, str(e))]
return []
def pull_wrapper(info):
role_info = info
retries = info[1]["retries"]
delay = info[1]["delay"]
for i in range(retries):
success = pull_role(role_info)
if success:
return True
else:
time.sleep(delay)
info[2].append(["Role {0} failed after {1} retries\n".format(role_info[0],
retries)])
return False
def pull_role(info):
role, config, failures = info
required_version = role["version"]
version_hash = False
if 'version' in role:
# If the version is the length of a hash then treat is as one
if len(required_version) == 40:
version_hash = True
# if repo exists
if os.path.exists(role["dest"]):
try:
repo = git.Repo(role["dest"])
except Exception:
failtxt = "Role in {0} is broken/not a git repo.".format(
role["dest"])
failtxt += "Please delete or fix it manually"
failures.append(failtxt)
return False # go to next role
repo_url = list(repo.remote().urls)[0]
if repo_url != role["src"]:
repo.remote().set_url(role["src"])
# if they want master then fetch, checkout and pull to stay at latest
# master
if required_version == "master":
fail = check_out_version(repo, required_version, pull=True,
force=config["force"],
refspec=role["refspec"],
depth=role["depth"])
# If we have a hash then reset it to
elif version_hash:
fail = reset_to_version(role["dest"],
required_version,
force=config["force"],
refspec=role["refspec"],
depth=role["depth"])
else:
# describe can fail in some cases so be careful:
try:
current_version = repo.git.describe(tags=True)
except Exception:
current_version = ""
if current_version == required_version and not config["force"]:
fail = []
pass
else:
fail = check_out_version(repo, required_version,
force=config["force"],
refspec=role["refspec"],
depth=role["depth"])
else:
try:
# If we have a hash id then treat this a little differently
if not version_hash:
git.Repo.clone_from(role["src"], role["dest"],
branch=required_version,
depth=role["depth"],
no_single_branch=True)
fail = []
else:
git.Repo.clone_from(role["src"], role["dest"],
branch='master',
no_single_branch=True,
depth=role["depth"])
fail = reset_to_version(role["dest"], required_version,
refspec=role["refspec"],
depth=role["depth"])
except Exception as e:
fail = ('Failed cloning repo %s\n%s' % (role["dest"], str(e)))
if fail == []:
return True
else:
failures.append(fail)
return False
def set_default(dictionary, key, defaults):
if key not in dictionary.keys():
dictionary[key] = defaults[key]
def main():
# Define variables
failures = multiprocessing.Manager().list()
# Data we can pass in to the module
fields = {
"repo_info": {"required": True, "type": "list"},
"default_path": {"required": True,
"type": "str"},
"default_version": {"required": False,
"type": "str",
"default": "master"},
"default_refspec": {"required": False,
"type": "str",
"default": None},
"default_depth": {"required": False,
"type": "int",
"default": 10},
"retries": {"required": False,
"type": "int",
"default": 1},
"delay": {"required": False,
"type": "int",
"default": 0},
"force": {"required": False,
"type": "bool",
"default": True},
"core_multiplier": {"required": False,
"type": "int",
"default": 4},
}
# Pull in module fields and pass into variables
module = AnsibleModule(argument_spec=fields)
git_repos = module.params['repo_info']
defaults = {
"path": module.params["default_path"],
"depth": module.params["default_depth"],
"version": module.params["default_version"],
"refspec": module.params["default_refspec"]
}
config = {
"retries": module.params["retries"],
"delay": module.params["delay"],
"force": module.params["force"],
"core_multiplier": module.params["core_multiplier"]
}
# Set up defaults
for repo in git_repos:
for key in ["path", "refspec", "version", "depth"]:
set_default(repo, key, defaults)
if "name" not in repo.keys():
repo["name"] = os.path.basename(repo["src"])
repo["dest"] = os.path.join(repo["path"], repo["name"])
# Define varibles
failures = multiprocessing.Manager().list()
core_count = multiprocessing.cpu_count() * config["core_multiplier"]
# Load up process and pass in interrupt and core process count
p = multiprocessing.Pool(core_count, init_signal)
clone_success = p.map(pull_wrapper, zip(git_repos,
itertools.repeat(config),
itertools.repeat(failures)),
chunksize=1)
p.close()
success = all(i for i in clone_success)
if success:
module.exit_json(msg=str(git_repos), changed=True)
else:
module.fail_json(msg=("Module failed"), meta=failures)
if __name__ == '__main__':
main()

View File

@ -25,3 +25,6 @@ openstacksdk>=0.14.0 # Apache-2.0
# We use this for the json_query filter
jmespath>=0.9.3 # MIT
# We use this for the parallel git clone
GitPython>=1.0.1

View File

@ -172,7 +172,7 @@ if [ -f "${ANSIBLE_ROLE_FILE}" ] && [[ -z "${SKIP_OSA_ROLE_CLONE+defined}" ]]; t
# NOTE(cloudnull): When bootstrapping we don't want ansible to interact
# with our plugins by default. This change will force
# ansible to ignore our plugins during this process.
export ANSIBLE_LIBRARY="/dev/null"
export ANSIBLE_LIBRARY="${OSA_CLONE_DIR}/playbooks/library"
export ANSIBLE_LOOKUP_PLUGINS="/dev/null"
export ANSIBLE_FILTER_PLUGINS="/dev/null"
export ANSIBLE_ACTION_PLUGINS="/dev/null"

View File

@ -116,22 +116,19 @@
set_fact:
clone_roles: "{{ clone_roles + user_roles }}"
- name: Clone git repos (with git)
git:
repo: "{{ item.src }}"
dest: "{{ item.path | default(role_path_default) }}/{{ item.name | default(item.src | basename) }}"
version: "{{ item.version | default('master') }}"
refspec: "{{ item.refspec | default(omit) }}"
depth: "{{ item.depth | default('10') }}"
update: true
- name: Clone git repos (parallel)
git_requirements:
default_path: "{{ role_path_default }}"
default_depth: 10
default_version: "master"
repo_info: "{{ clone_roles }}"
retries: "{{ git_clone_retries }}"
delay: "{{ git_clone_retry_delay }}"
force: true
with_items: "{{ clone_roles }}"
register: git_clone
until: git_clone is success
retries: "{{ git_clone_retries }}"
delay: "{{ git_clone_retry_delay }}"
core_multiplier: 4
vars:
ansible_python_interpreter: "/opt/ansible-runtime/bin/python"
required_roles: "{{ lookup('file', role_file) | from_yaml }}"
openstack_services_file: "{{ playbook_dir }}/../playbooks/defaults/repo_packages/openstack_services.yml"
role_file: "{{ playbook_dir }}/../ansible-role-requirements.yml"

View File

@ -26,7 +26,7 @@ export ANSIBLE_ROLES_PATH="${ANSIBLE_ROLES_PATH:-/etc/ansible/roles:OSA_PLAYBOOK
export ANSIBLE_COLLECTIONS_PATHS="${ANSIBLE_COLLECTIONS_PATHS:-/etc/ansible}"
export ANSIBLE_COLLECTIONS_PATH="${ANSIBLE_COLLECTIONS_PATH:-/etc/ansible}"
export ANSIBLE_LIBRARY="${ANSIBLE_LIBRARY:-/etc/ansible/roles/config_template/library:/etc/ansible/roles/plugins/library:/etc/ansible/roles/ceph-ansible/library}"
export ANSIBLE_LIBRARY="${ANSIBLE_LIBRARY:-OSA_PLAYBOOK_PATH/library:/etc/ansible/roles/config_template/library:/etc/ansible/roles/plugins/library:/etc/ansible/roles/ceph-ansible/library}"
export ANSIBLE_LOOKUP_PLUGINS="${ANSIBLE_LOOKUP_PLUGINS:-/etc/ansible/roles/plugins/lookup}"
export ANSIBLE_FILTER_PLUGINS="${ANSIBLE_FILTER_PLUGINS:-/etc/ansible/roles/plugins/filter:/etc/ansible/roles/ceph-ansible/plugins/filter}"
export ANSIBLE_ACTION_PLUGINS="${ANSIBLE_ACTION_PLUGINS:-/etc/ansible/roles/config_template/action:/etc/ansible/roles/plugins/action:/etc/ansible/roles/ceph-ansible/plugins/actions}"