Fix multi-tenant caching of extra config files

The cache of unparsed config data holds all of the config objects
on a project-branch, for all of the tenants.  That works as long
as all of the tenants load the same config files.  With the addition
of the extra-config-paths tenant option, different tenants may now
see different configuration files for the same project-branch.
Because the cache did not account for that, the data for the last
tenant to load a project-branch is what would be cached.

To correct this, make the global unparsed config cache aware of
which paths have been searched in constructing it, and cache each
file individually.  This allows the cache to continue to be
efficient across systems with large tenants where the same repos
appear multiple times, in that each configuration object will
still be stored only once.  However, by storing them along with
their files, the unparsed config for each tenant can be created
from the cache, even if the cache contains more files than that
tenant should see.  Because the cache remembers which files have
been searched, we can determine whether the cache results from a
prior tenant are valid for a later one, or if the later one is
meant to search more files, we can run another cat job and fetch
the larger set of files and expand the cache.

A unit test is adjusted to add a tenant which loads fewer files
before one which loads more to verify that the cache expands
appropriately when loading the second tenant.

Change-Id: I268d89dfaaaf65af2237ec852d706a2752535c4a
This commit is contained in:
James E. Blair 2019-07-03 16:25:41 -07:00
parent 34cdfe33b6
commit 5b851c14f2
4 changed files with 114 additions and 39 deletions

View File

@ -1,3 +1,13 @@
# Load everything in a different tenant first without the extra files,
# to make sure that it doesn't poison the cache.
- tenant:
name: tenant-zero
source:
gerrit:
untrusted-projects:
- org/project1
- org/project2
- tenant:
name: tenant-one
source:

View File

@ -1673,18 +1673,14 @@ class TenantParser(object):
# in-repo configuration apply only to that branch.
branches = tenant.getProjectBranches(project)
for branch in branches:
unparsed_config = abide.getUnparsedConfig(
branch_cache = abide.getUnparsedBranchCache(
project.canonical_name, branch)
if unparsed_config and not unparsed_config.load_skipped:
if branch_cache.isValidFor(tpc):
# We already have this branch cached.
continue
if not tpc.load_classes:
# If all config classes are excluded then do not
# request any getFiles jobs, but cache the lack of
# data so we know we've looked at this branch.
abide.cacheUnparsedConfig(
project.canonical_name,
branch, model.UnparsedConfig(load_skipped=True))
# request any getFiles jobs.
continue
job = self.merger.getFiles(
project.source.connection.connection_name,
@ -1698,6 +1694,7 @@ class TenantParser(object):
job.source_context = model.SourceContext(
project, branch, '', False)
jobs.append(job)
branch_cache.setValidFor(tpc)
for job in jobs:
self.log.debug("Waiting for cat job %s" % (job,))
@ -1736,18 +1733,21 @@ class TenantParser(object):
(source_context,))
incdata = self.loadProjectYAML(
job.files[fn], source_context, loading_errors)
branch_cache = abide.getUnparsedBranchCache(
source_context.project.canonical_name,
source_context.branch)
branch_cache.put(source_context.path, incdata)
unparsed_config.extend(incdata)
abide.cacheUnparsedConfig(
job.source_context.project.canonical_name,
job.source_context.branch, unparsed_config)
def _loadTenantYAML(self, abide, tenant, loading_errors):
config_projects_config = model.UnparsedConfig()
untrusted_projects_config = model.UnparsedConfig()
for project in tenant.config_projects:
unparsed_branch_config = abide.getUnparsedConfig(
branch_cache = abide.getUnparsedBranchCache(
project.canonical_name, 'master')
tpc = tenant.project_configs[project.canonical_name]
unparsed_branch_config = branch_cache.get(tpc)
if unparsed_branch_config:
unparsed_branch_config = self.filterConfigProjectYAML(
@ -1758,8 +1758,10 @@ class TenantParser(object):
for project in tenant.untrusted_projects:
branches = tenant.getProjectBranches(project)
for branch in branches:
unparsed_branch_config = abide.getUnparsedConfig(
branch_cache = abide.getUnparsedBranchCache(
project.canonical_name, branch)
tpc = tenant.project_configs[project.canonical_name]
unparsed_branch_config = branch_cache.get(tpc)
if unparsed_branch_config:
unparsed_branch_config = self.filterUntrustedProjectYAML(
unparsed_branch_config, loading_errors)
@ -2111,8 +2113,8 @@ class ConfigLoader(object):
def reloadTenant(self, abide, tenant, ansible_manager):
new_abide = model.Abide()
new_abide.tenants = abide.tenants.copy()
new_abide.unparsed_project_branch_config = \
abide.unparsed_project_branch_config
new_abide.unparsed_project_branch_cache = \
abide.unparsed_project_branch_cache
# When reloading a tenant only, use cached data if available.
new_tenant = self.tenant_parser.fromYaml(

View File

@ -3423,7 +3423,7 @@ class UnparsedAbideConfig(object):
class UnparsedConfig(object):
"""A collection of yaml lists that has not yet been parsed into objects."""
def __init__(self, load_skipped=False):
def __init__(self):
self.pragmas = []
self.pipelines = []
self.jobs = []
@ -3433,9 +3433,9 @@ class UnparsedConfig(object):
self.secrets = []
self.semaphores = []
# This indicates wether this is empty because we skipped loading
# earlier because all config items have been excluded.
self.load_skipped = load_skipped
# The list of files/dirs which this represents.
self.files_examined = set()
self.dirs_examined = set()
def copy(self, trusted=None):
# If trusted is not None, update the source context of each
@ -4304,30 +4304,93 @@ class Tenant(object):
return Attributes(name=self.name)
class UnparsedBranchCache(object):
"""Cache information about a single branch"""
def __init__(self):
self.load_skipped = True
self.extra_files_searched = set()
self.extra_dirs_searched = set()
self.files = {}
def isValidFor(self, tpc):
"""Return True if this has valid cache results for the extra
files/dirs in the tpc.
"""
if self.load_skipped:
return False
if (set(tpc.extra_config_files) <= self.extra_files_searched and
set(tpc.extra_config_dirs) <= self.extra_dirs_searched):
return True
return False
def setValidFor(self, tpc):
self.load_skipped = False
self.extra_files_searched |= set(tpc.extra_config_files)
self.extra_dirs_searched |= set(tpc.extra_config_dirs)
def put(self, path, config):
self.files[path] = config
def get(self, tpc):
ret = UnparsedConfig()
files_list = self.files.keys()
fns1 = []
fns2 = []
fns3 = []
fns4 = []
for fn in files_list:
if fn.startswith("zuul.d/"):
fns1.append(fn)
if fn.startswith(".zuul.d/"):
fns2.append(fn)
for ef in tpc.extra_config_files:
if fn.startswith(ef):
fns3.append(fn)
for ed in tpc.extra_config_dirs:
if fn.startswith(ed):
fns4.append(fn)
fns = (["zuul.yaml"] + sorted(fns1) + [".zuul.yaml"] +
sorted(fns2) + fns3 + sorted(fns4))
for fn in fns:
data = self.files.get(fn)
if data is not None:
ret.extend(data)
return ret
class Abide(object):
def __init__(self):
self.tenants = OrderedDict()
# project -> branch -> UnparsedConfig
self.unparsed_project_branch_config = {}
# project -> branch -> UnparsedBranchCache
self.unparsed_project_branch_cache = {}
def cacheUnparsedConfig(self, canonical_project_name, branch, conf):
self.unparsed_project_branch_config.setdefault(
canonical_project_name, {})[branch] = conf
def hasUnparsedBranchCache(self, canonical_project_name, branch):
project_branch_cache = self.unparsed_project_branch_cache.setdefault(
canonical_project_name, {})
cache = project_branch_cache.get(branch)
if cache is None:
return False
return True
def getUnparsedConfig(self, canonical_project_name, branch):
return self.unparsed_project_branch_config.get(
canonical_project_name, {}).get(branch)
def getUnparsedBranchCache(self, canonical_project_name, branch):
project_branch_cache = self.unparsed_project_branch_cache.setdefault(
canonical_project_name, {})
cache = project_branch_cache.get(branch)
if cache is not None:
return cache
project_branch_cache[branch] = UnparsedBranchCache()
return project_branch_cache[branch]
def clearUnparsedConfigCache(self, canonical_project_name, branch=None):
if canonical_project_name in self.unparsed_project_branch_config:
project_branch_config = \
self.unparsed_project_branch_config[canonical_project_name]
def clearUnparsedBranchCache(self, canonical_project_name, branch=None):
if canonical_project_name in self.unparsed_project_branch_cache:
project_branch_cache = \
self.unparsed_project_branch_cache[canonical_project_name]
if branch in project_branch_config:
del project_branch_config[branch]
if branch in project_branch_cache:
del project_branch_cache[branch]
if len(project_branch_config) == 0 or branch is None:
del self.unparsed_project_branch_config[canonical_project_name]
if len(project_branch_cache) == 0 or branch is None:
del self.unparsed_project_branch_cache[canonical_project_name]
class JobTimeData(object):

View File

@ -729,7 +729,7 @@ class Scheduler(threading.Thread):
for (project, branch) in event.project_branches:
self.log.debug("Clearing unparsed config: %s @%s",
project.canonical_name, branch)
self.abide.clearUnparsedConfigCache(project.canonical_name,
self.abide.clearUnparsedBranchCache(project.canonical_name,
branch)
old_tenant = self.abide.tenants[event.tenant_name]
loader = configloader.ConfigLoader(
@ -1096,8 +1096,8 @@ class Scheduler(threading.Thread):
change.updatesConfig(tenant)) or
event.branch_created or
(event.branch_deleted and
self.abide.getUnparsedConfig(event.project_name,
event.branch) is not None)):
self.abide.hasUnparsedBranchCache(event.project_name,
event.branch))):
reconfigure_tenant = True
# If the driver knows the branch but we don't have a config, we
@ -1105,8 +1105,8 @@ class Scheduler(threading.Thread):
# was just configured as protected without a push in between.
if (event.branch in project.source.getProjectBranches(
project, tenant)
and self.abide.getUnparsedConfig(
project.canonical_name, event.branch) is None):
and not self.abide.hasUnparsedBranchCache(
project.canonical_name, event.branch)):
reconfigure_tenant = True
# If the branch is unprotected and unprotected branches