Don't fail config loading on missing min. ltime

If the list of branches for a project includes items that are not (yet)
in the min. ltimes for a layout state we can end up in a situation where
a scheduler is unable to start.

2022-05-16 17:34:50,895 ERROR zuul.Scheduler: Error starting Zuul:
Traceback (most recent call last):
  File "/opt/zuul/lib/python3.8/site-packages/zuul/cmd/scheduler.py", line 98, in run
    self.sched.prime(self.config)
  File "/opt/zuul/lib/python3.8/site-packages/zuul/scheduler.py", line 931, in prime
    tenant = loader.loadTenant(
  File "/opt/zuul/lib/python3.8/site-packages/zuul/configloader.py", line 2530, in loadTenant
    new_tenant = self.tenant_parser.fromYaml(
  File "/opt/zuul/lib/python3.8/site-packages/zuul/configloader.py", line 1631, in fromYaml
    self._cacheTenantYAML(abide, tenant, loading_errors, min_ltimes,
  File "/opt/zuul/lib/python3.8/site-packages/zuul/configloader.py", line 1896, in _cacheTenantYAML
    pb_ltime = min_ltimes[project.canonical_name][branch]
KeyError: 'new_branch'

The difference could be due to a missed branch creation event or a
simple race condition. The later case might fix itself after the
reconfig triggered by the branch creation event was processed.

Change-Id: I1838e66bc5296f153aa4c7a83ac0addb6c4db1aa
This commit is contained in:
Simon Westphahl 2022-05-17 14:30:18 +02:00
parent f2297cadb0
commit 558202dad6
2 changed files with 31 additions and 1 deletions

View File

@ -211,6 +211,31 @@ class TestTenantSimple(TenantParserTestCase):
r'review.example.com/org/project2 @master.*',
update_logs.output)
def test_cache_new_branch(self):
first = self.scheds.first
lock1 = first.sched.layout_update_lock
lock2_ = first.sched.run_handler_lock
with lock1, lock2_:
self.create_branch('org/project1', 'stable')
self.fake_gerrit.addEvent(
self.fake_gerrit.getFakeBranchCreatedEvent(
'org/project1', 'stable'))
second = self.createScheduler()
second.start()
self.assertEqual(len(self.scheds), 2)
for _ in iterate_timeout(10, "until priming is complete"):
state_one = first.sched.local_layout_state.get("tenant-one")
if state_one:
break
for _ in iterate_timeout(
10, "all schedulers to have the same layout state"):
if (second.sched.local_layout_state.get(
"tenant-one") == state_one):
break
self.waitUntilSettled()
def test_variant_description(self):
tenant = self.scheds.first.sched.abide.tenants.get('tenant-one')
job = tenant.layout.jobs.get("project2-job")

View File

@ -1907,7 +1907,12 @@ class TenantParser(object):
project.canonical_name, branch)
branch_cache = abide.getUnparsedBranchCache(
project.canonical_name, branch)
pb_ltime = min_ltimes[project.canonical_name][branch]
try:
pb_ltime = min_ltimes[project.canonical_name][branch]
except KeyError:
self.log.exception(
"Min. ltime missing for project/branch")
pb_ltime = -1
# If our unparsed branch cache is valid for the
# time, then we don't need to do anything else.