From c22ff00dece66221bb05b6a57924537d03ca7a13 Mon Sep 17 00:00:00 2001 From: Tobias Henkel Date: Thu, 4 Oct 2018 10:03:31 +0200 Subject: [PATCH] Parallelize prime_installation_map During zuul startup when using Github app mechanism zuul iterates single threaded over all installations and queries the projects of the installation. In large zuul installations with many projects and organizations in Github this blocks zuul startup for quite some time. We can easily parallelize this by using a thread pool. This brings down the time spent from 18s to 2.5s in our deployment. Change-Id: I16b940488cd0b3705c8f10b4c17fabfc77db368a --- zuul/driver/github/githubconnection.py | 60 ++++++++++++++++++-------- 1 file changed, 43 insertions(+), 17 deletions(-) diff --git a/zuul/driver/github/githubconnection.py b/zuul/driver/github/githubconnection.py index 81a35560b4..d9f3e61ffd 100644 --- a/zuul/driver/github/githubconnection.py +++ b/zuul/driver/github/githubconnection.py @@ -13,6 +13,7 @@ # under the License. import collections +import concurrent.futures import datetime import logging import hmac @@ -668,6 +669,23 @@ class GithubConnection(BaseConnection): return token + def _get_repos_of_installation(self, inst_id, headers): + url = '%s/installation/repositories?per_page=100' % self.base_url + project_names = [] + while url: + self.log.debug("Fetching repos for install %s" % inst_id) + response = requests.get(url, headers=headers) + response.raise_for_status() + repos = response.json() + + for repo in repos.get('repositories'): + project_name = repo.get('full_name') + project_names.append(project_name) + + # check if we need to do further paged calls + url = response.links.get('next', {}).get('url') + return project_names + def _prime_installation_map(self): """Walks each app install for the repos to prime install IDs""" @@ -690,27 +708,35 @@ class GithubConnection(BaseConnection): url = response.links.get( 'next', {}).get('url') - for install in installations: - inst_id = install.get('id') - token = self._get_installation_key( - project=None, inst_id=inst_id) - headers = {'Accept': PREVIEW_JSON_ACCEPT, - 'Authorization': 'token %s' % token} + headers_per_inst = {} + with concurrent.futures.ThreadPoolExecutor() as executor: - url = '%s/installation/repositories?per_page=100' % self.base_url - while url: - self.log.debug("Fetching repos for install %s" % inst_id) - response = requests.get(url, headers=headers) - response.raise_for_status() - repos = response.json() + token_by_inst = {} + for install in installations: + inst_id = install.get('id') + token_by_inst[inst_id] = executor.submit( + self._get_installation_key, project=None, inst_id=inst_id) - for repo in repos.get('repositories'): - project_name = repo.get('full_name') + for inst_id, result in token_by_inst.items(): + token = result.result() + headers_per_inst[inst_id] = { + 'Accept': PREVIEW_JSON_ACCEPT, + 'Authorization': 'token %s' % token + } + + project_names_by_inst = {} + for install in installations: + inst_id = install.get('id') + headers = headers_per_inst[inst_id] + + project_names_by_inst[inst_id] = executor.submit( + self._get_repos_of_installation, inst_id, headers) + + for inst_id, result in project_names_by_inst.items(): + project_names = result.result() + for project_name in project_names: self.installation_map[project_name] = inst_id - # check if we need to do further paged calls - url = response.links.get('next', {}).get('url') - def addEvent(self, data, event=None, delivery=None): return self.event_queue.put((time.time(), data, event, delivery))