Fix multi-tenant caching of extra config files
The cache of unparsed config data holds all of the config objects on a project-branch, for all of the tenants. That works as long as all of the tenants load the same config files. With the addition of the extra-config-paths tenant option, different tenants may now see different configuration files for the same project-branch. Because the cache did not account for that, the data for the last tenant to load a project-branch is what would be cached. To correct this, make the global unparsed config cache aware of which paths have been searched in constructing it, and cache each file individually. This allows the cache to continue to be efficient across systems with large tenants where the same repos appear multiple times, in that each configuration object will still be stored only once. However, by storing them along with their files, the unparsed config for each tenant can be created from the cache, even if the cache contains more files than that tenant should see. Because the cache remembers which files have been searched, we can determine whether the cache results from a prior tenant are valid for a later one, or if the later one is meant to search more files, we can run another cat job and fetch the larger set of files and expand the cache. A unit test is adjusted to add a tenant which loads fewer files before one which loads more to verify that the cache expands appropriately when loading the second tenant. Change-Id: I268d89dfaaaf65af2237ec852d706a2752535c4a
This commit is contained in:
parent
34cdfe33b6
commit
5b851c14f2
10
tests/fixtures/config/tenant-parser/extra.yaml
vendored
10
tests/fixtures/config/tenant-parser/extra.yaml
vendored
@ -1,3 +1,13 @@
|
||||
# Load everything in a different tenant first without the extra files,
|
||||
# to make sure that it doesn't poison the cache.
|
||||
- tenant:
|
||||
name: tenant-zero
|
||||
source:
|
||||
gerrit:
|
||||
untrusted-projects:
|
||||
- org/project1
|
||||
- org/project2
|
||||
|
||||
- tenant:
|
||||
name: tenant-one
|
||||
source:
|
||||
|
@ -1673,18 +1673,14 @@ class TenantParser(object):
|
||||
# in-repo configuration apply only to that branch.
|
||||
branches = tenant.getProjectBranches(project)
|
||||
for branch in branches:
|
||||
unparsed_config = abide.getUnparsedConfig(
|
||||
branch_cache = abide.getUnparsedBranchCache(
|
||||
project.canonical_name, branch)
|
||||
if unparsed_config and not unparsed_config.load_skipped:
|
||||
if branch_cache.isValidFor(tpc):
|
||||
# We already have this branch cached.
|
||||
continue
|
||||
if not tpc.load_classes:
|
||||
# If all config classes are excluded then do not
|
||||
# request any getFiles jobs, but cache the lack of
|
||||
# data so we know we've looked at this branch.
|
||||
abide.cacheUnparsedConfig(
|
||||
project.canonical_name,
|
||||
branch, model.UnparsedConfig(load_skipped=True))
|
||||
# request any getFiles jobs.
|
||||
continue
|
||||
job = self.merger.getFiles(
|
||||
project.source.connection.connection_name,
|
||||
@ -1698,6 +1694,7 @@ class TenantParser(object):
|
||||
job.source_context = model.SourceContext(
|
||||
project, branch, '', False)
|
||||
jobs.append(job)
|
||||
branch_cache.setValidFor(tpc)
|
||||
|
||||
for job in jobs:
|
||||
self.log.debug("Waiting for cat job %s" % (job,))
|
||||
@ -1736,18 +1733,21 @@ class TenantParser(object):
|
||||
(source_context,))
|
||||
incdata = self.loadProjectYAML(
|
||||
job.files[fn], source_context, loading_errors)
|
||||
branch_cache = abide.getUnparsedBranchCache(
|
||||
source_context.project.canonical_name,
|
||||
source_context.branch)
|
||||
branch_cache.put(source_context.path, incdata)
|
||||
unparsed_config.extend(incdata)
|
||||
abide.cacheUnparsedConfig(
|
||||
job.source_context.project.canonical_name,
|
||||
job.source_context.branch, unparsed_config)
|
||||
|
||||
def _loadTenantYAML(self, abide, tenant, loading_errors):
|
||||
config_projects_config = model.UnparsedConfig()
|
||||
untrusted_projects_config = model.UnparsedConfig()
|
||||
|
||||
for project in tenant.config_projects:
|
||||
unparsed_branch_config = abide.getUnparsedConfig(
|
||||
branch_cache = abide.getUnparsedBranchCache(
|
||||
project.canonical_name, 'master')
|
||||
tpc = tenant.project_configs[project.canonical_name]
|
||||
unparsed_branch_config = branch_cache.get(tpc)
|
||||
|
||||
if unparsed_branch_config:
|
||||
unparsed_branch_config = self.filterConfigProjectYAML(
|
||||
@ -1758,8 +1758,10 @@ class TenantParser(object):
|
||||
for project in tenant.untrusted_projects:
|
||||
branches = tenant.getProjectBranches(project)
|
||||
for branch in branches:
|
||||
unparsed_branch_config = abide.getUnparsedConfig(
|
||||
branch_cache = abide.getUnparsedBranchCache(
|
||||
project.canonical_name, branch)
|
||||
tpc = tenant.project_configs[project.canonical_name]
|
||||
unparsed_branch_config = branch_cache.get(tpc)
|
||||
if unparsed_branch_config:
|
||||
unparsed_branch_config = self.filterUntrustedProjectYAML(
|
||||
unparsed_branch_config, loading_errors)
|
||||
@ -2111,8 +2113,8 @@ class ConfigLoader(object):
|
||||
def reloadTenant(self, abide, tenant, ansible_manager):
|
||||
new_abide = model.Abide()
|
||||
new_abide.tenants = abide.tenants.copy()
|
||||
new_abide.unparsed_project_branch_config = \
|
||||
abide.unparsed_project_branch_config
|
||||
new_abide.unparsed_project_branch_cache = \
|
||||
abide.unparsed_project_branch_cache
|
||||
|
||||
# When reloading a tenant only, use cached data if available.
|
||||
new_tenant = self.tenant_parser.fromYaml(
|
||||
|
103
zuul/model.py
103
zuul/model.py
@ -3423,7 +3423,7 @@ class UnparsedAbideConfig(object):
|
||||
class UnparsedConfig(object):
|
||||
"""A collection of yaml lists that has not yet been parsed into objects."""
|
||||
|
||||
def __init__(self, load_skipped=False):
|
||||
def __init__(self):
|
||||
self.pragmas = []
|
||||
self.pipelines = []
|
||||
self.jobs = []
|
||||
@ -3433,9 +3433,9 @@ class UnparsedConfig(object):
|
||||
self.secrets = []
|
||||
self.semaphores = []
|
||||
|
||||
# This indicates wether this is empty because we skipped loading
|
||||
# earlier because all config items have been excluded.
|
||||
self.load_skipped = load_skipped
|
||||
# The list of files/dirs which this represents.
|
||||
self.files_examined = set()
|
||||
self.dirs_examined = set()
|
||||
|
||||
def copy(self, trusted=None):
|
||||
# If trusted is not None, update the source context of each
|
||||
@ -4304,30 +4304,93 @@ class Tenant(object):
|
||||
return Attributes(name=self.name)
|
||||
|
||||
|
||||
class UnparsedBranchCache(object):
|
||||
"""Cache information about a single branch"""
|
||||
def __init__(self):
|
||||
self.load_skipped = True
|
||||
self.extra_files_searched = set()
|
||||
self.extra_dirs_searched = set()
|
||||
self.files = {}
|
||||
|
||||
def isValidFor(self, tpc):
|
||||
"""Return True if this has valid cache results for the extra
|
||||
files/dirs in the tpc.
|
||||
"""
|
||||
if self.load_skipped:
|
||||
return False
|
||||
if (set(tpc.extra_config_files) <= self.extra_files_searched and
|
||||
set(tpc.extra_config_dirs) <= self.extra_dirs_searched):
|
||||
return True
|
||||
return False
|
||||
|
||||
def setValidFor(self, tpc):
|
||||
self.load_skipped = False
|
||||
self.extra_files_searched |= set(tpc.extra_config_files)
|
||||
self.extra_dirs_searched |= set(tpc.extra_config_dirs)
|
||||
|
||||
def put(self, path, config):
|
||||
self.files[path] = config
|
||||
|
||||
def get(self, tpc):
|
||||
ret = UnparsedConfig()
|
||||
files_list = self.files.keys()
|
||||
fns1 = []
|
||||
fns2 = []
|
||||
fns3 = []
|
||||
fns4 = []
|
||||
for fn in files_list:
|
||||
if fn.startswith("zuul.d/"):
|
||||
fns1.append(fn)
|
||||
if fn.startswith(".zuul.d/"):
|
||||
fns2.append(fn)
|
||||
for ef in tpc.extra_config_files:
|
||||
if fn.startswith(ef):
|
||||
fns3.append(fn)
|
||||
for ed in tpc.extra_config_dirs:
|
||||
if fn.startswith(ed):
|
||||
fns4.append(fn)
|
||||
fns = (["zuul.yaml"] + sorted(fns1) + [".zuul.yaml"] +
|
||||
sorted(fns2) + fns3 + sorted(fns4))
|
||||
for fn in fns:
|
||||
data = self.files.get(fn)
|
||||
if data is not None:
|
||||
ret.extend(data)
|
||||
return ret
|
||||
|
||||
|
||||
class Abide(object):
|
||||
def __init__(self):
|
||||
self.tenants = OrderedDict()
|
||||
# project -> branch -> UnparsedConfig
|
||||
self.unparsed_project_branch_config = {}
|
||||
# project -> branch -> UnparsedBranchCache
|
||||
self.unparsed_project_branch_cache = {}
|
||||
|
||||
def cacheUnparsedConfig(self, canonical_project_name, branch, conf):
|
||||
self.unparsed_project_branch_config.setdefault(
|
||||
canonical_project_name, {})[branch] = conf
|
||||
def hasUnparsedBranchCache(self, canonical_project_name, branch):
|
||||
project_branch_cache = self.unparsed_project_branch_cache.setdefault(
|
||||
canonical_project_name, {})
|
||||
cache = project_branch_cache.get(branch)
|
||||
if cache is None:
|
||||
return False
|
||||
return True
|
||||
|
||||
def getUnparsedConfig(self, canonical_project_name, branch):
|
||||
return self.unparsed_project_branch_config.get(
|
||||
canonical_project_name, {}).get(branch)
|
||||
def getUnparsedBranchCache(self, canonical_project_name, branch):
|
||||
project_branch_cache = self.unparsed_project_branch_cache.setdefault(
|
||||
canonical_project_name, {})
|
||||
cache = project_branch_cache.get(branch)
|
||||
if cache is not None:
|
||||
return cache
|
||||
project_branch_cache[branch] = UnparsedBranchCache()
|
||||
return project_branch_cache[branch]
|
||||
|
||||
def clearUnparsedConfigCache(self, canonical_project_name, branch=None):
|
||||
if canonical_project_name in self.unparsed_project_branch_config:
|
||||
project_branch_config = \
|
||||
self.unparsed_project_branch_config[canonical_project_name]
|
||||
def clearUnparsedBranchCache(self, canonical_project_name, branch=None):
|
||||
if canonical_project_name in self.unparsed_project_branch_cache:
|
||||
project_branch_cache = \
|
||||
self.unparsed_project_branch_cache[canonical_project_name]
|
||||
|
||||
if branch in project_branch_config:
|
||||
del project_branch_config[branch]
|
||||
if branch in project_branch_cache:
|
||||
del project_branch_cache[branch]
|
||||
|
||||
if len(project_branch_config) == 0 or branch is None:
|
||||
del self.unparsed_project_branch_config[canonical_project_name]
|
||||
if len(project_branch_cache) == 0 or branch is None:
|
||||
del self.unparsed_project_branch_cache[canonical_project_name]
|
||||
|
||||
|
||||
class JobTimeData(object):
|
||||
|
@ -729,7 +729,7 @@ class Scheduler(threading.Thread):
|
||||
for (project, branch) in event.project_branches:
|
||||
self.log.debug("Clearing unparsed config: %s @%s",
|
||||
project.canonical_name, branch)
|
||||
self.abide.clearUnparsedConfigCache(project.canonical_name,
|
||||
self.abide.clearUnparsedBranchCache(project.canonical_name,
|
||||
branch)
|
||||
old_tenant = self.abide.tenants[event.tenant_name]
|
||||
loader = configloader.ConfigLoader(
|
||||
@ -1096,8 +1096,8 @@ class Scheduler(threading.Thread):
|
||||
change.updatesConfig(tenant)) or
|
||||
event.branch_created or
|
||||
(event.branch_deleted and
|
||||
self.abide.getUnparsedConfig(event.project_name,
|
||||
event.branch) is not None)):
|
||||
self.abide.hasUnparsedBranchCache(event.project_name,
|
||||
event.branch))):
|
||||
reconfigure_tenant = True
|
||||
|
||||
# If the driver knows the branch but we don't have a config, we
|
||||
@ -1105,8 +1105,8 @@ class Scheduler(threading.Thread):
|
||||
# was just configured as protected without a push in between.
|
||||
if (event.branch in project.source.getProjectBranches(
|
||||
project, tenant)
|
||||
and self.abide.getUnparsedConfig(
|
||||
project.canonical_name, event.branch) is None):
|
||||
and not self.abide.hasUnparsedBranchCache(
|
||||
project.canonical_name, event.branch)):
|
||||
reconfigure_tenant = True
|
||||
|
||||
# If the branch is unprotected and unprotected branches
|
||||
|
Loading…
x
Reference in New Issue
Block a user