Merge "Parallelize config cache loading"
This commit is contained in:
commit
f2d4ff276b
|
@ -315,6 +315,9 @@ class TestOfflineZKOperations(ZuulTestCase):
|
|||
def assertFinalState(self):
|
||||
pass
|
||||
|
||||
def assertCleanShutdown(self):
|
||||
pass
|
||||
|
||||
def test_delete_state(self):
|
||||
# Shut everything down (as much as possible) to reduce
|
||||
# logspam and errors.
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
import collections
|
||||
from contextlib import contextmanager
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
import copy
|
||||
import itertools
|
||||
import os
|
||||
|
@ -1608,7 +1609,7 @@ class TenantParser(object):
|
|||
}
|
||||
return vs.Schema(tenant)
|
||||
|
||||
def fromYaml(self, abide, conf, ansible_manager, min_ltimes=None,
|
||||
def fromYaml(self, abide, conf, ansible_manager, executor, min_ltimes=None,
|
||||
layout_uuid=None, branch_cache_min_ltimes=None,
|
||||
ignore_cat_exception=True):
|
||||
# Note: This vs schema validation is not necessary in most cases as we
|
||||
|
@ -1659,7 +1660,15 @@ class TenantParser(object):
|
|||
# We prepare a stack to store config loading issues
|
||||
loading_errors = model.LoadingErrors()
|
||||
|
||||
# Get branches in parallel
|
||||
branch_futures = {}
|
||||
for tpc in config_tpcs + untrusted_tpcs:
|
||||
future = executor.submit(self._getProjectBranches,
|
||||
tenant, tpc, branch_cache_min_ltimes)
|
||||
branch_futures[future] = tpc
|
||||
|
||||
for branch_future in as_completed(branch_futures.keys()):
|
||||
tpc = branch_futures[branch_future]
|
||||
source_context = model.ProjectContext(
|
||||
tpc.project.canonical_name, tpc.project.name)
|
||||
with project_configuration_exceptions(source_context,
|
||||
|
@ -1682,7 +1691,7 @@ class TenantParser(object):
|
|||
# already cached. Full reconfigurations start with an empty
|
||||
# cache.
|
||||
self._cacheTenantYAML(abide, tenant, loading_errors, min_ltimes,
|
||||
ignore_cat_exception)
|
||||
executor, ignore_cat_exception)
|
||||
|
||||
# Then collect the appropriate YAML based on this tenant
|
||||
# config.
|
||||
|
@ -1879,7 +1888,7 @@ class TenantParser(object):
|
|||
raise Exception("Unable to parse project %s", conf)
|
||||
return projects
|
||||
|
||||
def loadTenantProjects(self, conf_tenant):
|
||||
def loadTenantProjects(self, conf_tenant, executor):
|
||||
config_projects = []
|
||||
untrusted_projects = []
|
||||
|
||||
|
@ -1887,6 +1896,7 @@ class TenantParser(object):
|
|||
'secret', 'project-template', 'nodeset',
|
||||
'queue'])
|
||||
|
||||
futures = []
|
||||
for source_name, conf_source in conf_tenant.get('source', {}).items():
|
||||
source = self.connections.getSource(source_name)
|
||||
|
||||
|
@ -1895,7 +1905,8 @@ class TenantParser(object):
|
|||
# tpcs = TenantProjectConfigs
|
||||
tpcs = self._getProjects(source, conf_repo, current_include)
|
||||
for tpc in tpcs:
|
||||
self._loadProjectKeys(source_name, tpc.project)
|
||||
futures.append(executor.submit(
|
||||
self._loadProjectKeys, source_name, tpc.project))
|
||||
config_projects.append(tpc)
|
||||
|
||||
current_include = frozenset(default_include - set(['pipeline']))
|
||||
|
@ -1903,13 +1914,16 @@ class TenantParser(object):
|
|||
tpcs = self._getProjects(source, conf_repo,
|
||||
current_include)
|
||||
for tpc in tpcs:
|
||||
self._loadProjectKeys(source_name, tpc.project)
|
||||
futures.append(executor.submit(
|
||||
self._loadProjectKeys, source_name, tpc.project))
|
||||
untrusted_projects.append(tpc)
|
||||
|
||||
for f in futures:
|
||||
f.result()
|
||||
return config_projects, untrusted_projects
|
||||
|
||||
def _cacheTenantYAML(self, abide, tenant, loading_errors, min_ltimes,
|
||||
ignore_cat_exception=True):
|
||||
executor, ignore_cat_exception=True):
|
||||
# min_ltimes can be the following: None (that means that we
|
||||
# should not use the file cache at all) or a nested dict of
|
||||
# project and branch to ltime. A value of None usually means
|
||||
|
@ -1973,6 +1987,7 @@ class TenantParser(object):
|
|||
|
||||
jobs = []
|
||||
|
||||
futures = []
|
||||
for project in itertools.chain(
|
||||
tenant.config_projects, tenant.untrusted_projects):
|
||||
tpc = tenant.project_configs[project.canonical_name]
|
||||
|
@ -1986,7 +2001,35 @@ class TenantParser(object):
|
|||
# If all config classes are excluded then do not
|
||||
# request any getFiles jobs.
|
||||
continue
|
||||
futures.append(executor.submit(self._cacheTenantYAMLBranch,
|
||||
abide, tenant, loading_errors,
|
||||
min_ltimes, tpc, project,
|
||||
branch, jobs))
|
||||
for future in futures:
|
||||
future.result()
|
||||
|
||||
try:
|
||||
self._processCatJobs(abide, tenant, loading_errors, jobs,
|
||||
min_ltimes)
|
||||
except Exception:
|
||||
self.log.exception("Error processing cat jobs, canceling")
|
||||
for job in jobs:
|
||||
try:
|
||||
self.log.debug("Canceling cat job %s", job)
|
||||
self.merger.cancel(job)
|
||||
except Exception:
|
||||
self.log.exception("Unable to cancel job %s", job)
|
||||
if not ignore_cat_exception:
|
||||
raise
|
||||
if not ignore_cat_exception:
|
||||
raise
|
||||
|
||||
def _cacheTenantYAMLBranch(self, abide, tenant, loading_errors, min_ltimes,
|
||||
tpc, project, branch, jobs):
|
||||
# This is the middle section of _cacheTenantYAML, called for
|
||||
# each project-branch. It's a separate method so we can
|
||||
# execute it in parallel. The "jobs" argument is mutated and
|
||||
# accumulates a list of all merger jobs submitted.
|
||||
source_context = model.SourceContext(
|
||||
project.canonical_name, project.name,
|
||||
project.connection_name, branch, '', False)
|
||||
|
@ -2007,7 +2050,7 @@ class TenantParser(object):
|
|||
if branch_cache.isValidFor(tpc, pb_ltime):
|
||||
min_ltimes[project.canonical_name][branch] =\
|
||||
branch_cache.ltime
|
||||
continue
|
||||
return
|
||||
|
||||
with self.unparsed_config_cache.readLock(
|
||||
project.canonical_name):
|
||||
|
@ -2021,7 +2064,7 @@ class TenantParser(object):
|
|||
abide, tenant, source_context, files_cache,
|
||||
loading_errors, files_cache.ltime,
|
||||
min_ltimes)
|
||||
continue
|
||||
return
|
||||
|
||||
extra_config_files = abide.getExtraConfigFiles(project.name)
|
||||
extra_config_dirs = abide.getExtraConfigDirs(project.name)
|
||||
|
@ -2031,7 +2074,7 @@ class TenantParser(object):
|
|||
raise Exception(
|
||||
"Configuration files missing from cache. "
|
||||
"Check Zuul scheduler logs for more information.")
|
||||
continue
|
||||
return
|
||||
ltime = self.zk_client.getCurrentLtime()
|
||||
job = self.merger.getFiles(
|
||||
project.source.connection.connection_name,
|
||||
|
@ -2047,23 +2090,10 @@ class TenantParser(object):
|
|||
job.ltime = ltime
|
||||
job.source_context = source_context
|
||||
jobs.append(job)
|
||||
try:
|
||||
self._processCatJobs(abide, tenant, loading_errors, jobs,
|
||||
min_ltimes)
|
||||
except Exception:
|
||||
self.log.exception("Error processing cat jobs, canceling")
|
||||
for job in jobs:
|
||||
try:
|
||||
self.log.debug("Canceling cat job %s", job)
|
||||
self.merger.cancel(job)
|
||||
except Exception:
|
||||
self.log.exception("Unable to cancel job %s", job)
|
||||
if not ignore_cat_exception:
|
||||
raise
|
||||
if not ignore_cat_exception:
|
||||
raise
|
||||
|
||||
def _processCatJobs(self, abide, tenant, loading_errors, jobs, min_ltimes):
|
||||
# Called at the end of _cacheTenantYAML after all cat jobs
|
||||
# have been submitted
|
||||
for job in jobs:
|
||||
self.log.debug("Waiting for cat job %s" % (job,))
|
||||
res = job.wait(self.merger.git_timeout)
|
||||
|
@ -2545,9 +2575,11 @@ class ConfigLoader(object):
|
|||
|
||||
# Pre-load TenantProjectConfigs so we can get and cache all of a
|
||||
# project's config files (incl. tenant specific extra config) at once.
|
||||
with ThreadPoolExecutor(max_workers=4) as executor:
|
||||
for tenant_name, unparsed_config in tenants_to_load.items():
|
||||
config_tpcs, untrusted_tpcs = (
|
||||
self.tenant_parser.loadTenantProjects(unparsed_config)
|
||||
self.tenant_parser.loadTenantProjects(unparsed_config,
|
||||
executor)
|
||||
)
|
||||
abide.clearTPCs(tenant_name)
|
||||
for tpc in config_tpcs:
|
||||
|
@ -2632,9 +2664,11 @@ class ConfigLoader(object):
|
|||
return None
|
||||
|
||||
unparsed_config = unparsed_abide.tenants[tenant_name]
|
||||
with ThreadPoolExecutor(max_workers=4) as executor:
|
||||
new_tenant = self.tenant_parser.fromYaml(
|
||||
abide, unparsed_config, ansible_manager, min_ltimes, layout_uuid,
|
||||
branch_cache_min_ltimes, ignore_cat_exception)
|
||||
abide, unparsed_config, ansible_manager, executor,
|
||||
min_ltimes, layout_uuid, branch_cache_min_ltimes,
|
||||
ignore_cat_exception)
|
||||
# Copy tenants dictionary to not break concurrent iterations.
|
||||
tenants = abide.tenants.copy()
|
||||
tenants[tenant_name] = new_tenant
|
||||
|
|
|
@ -17,7 +17,7 @@ from threading import Thread
|
|||
from typing import List, Callable
|
||||
|
||||
from kazoo.client import KazooClient
|
||||
from kazoo.exceptions import NoNodeError
|
||||
from kazoo.exceptions import NoNodeError, NodeExistsError
|
||||
from kazoo.handlers.threading import KazooTimeoutError
|
||||
from kazoo.protocol.states import KazooState
|
||||
|
||||
|
@ -211,8 +211,11 @@ class ZooKeeperClient(object):
|
|||
try:
|
||||
zstat = self.client.set("/zuul/ltime", b"")
|
||||
except NoNodeError:
|
||||
try:
|
||||
self.client.create("/zuul/ltime", b"", makepath=True)
|
||||
zstat = self.client.set("/zuul/ltime", b"")
|
||||
except NodeExistsError:
|
||||
zstat = self.client.set("/zuul/ltime", b"")
|
||||
return zstat.last_modified_transaction_id
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue