Log tenant read/write lock actions
These additional debug log lines can help an operator or developer understand what the scheduler is doing or waiting on during complex reconfiguration operations (for example, to learn that one scheduler is waiting on another scheduler to finish reconfiguring a tenant). Change-Id: I99a756814ed9033b364d68a214ca906034108f16
This commit is contained in:
parent
5933704a6a
commit
2867ed246b
@ -50,7 +50,7 @@ class TestConfigLoader(ZuulTestCase):
|
||||
loader.loadAuthzRules(abide, unparsed_abide)
|
||||
|
||||
for tenant_name in unparsed_abide.tenants:
|
||||
tlock = tenant_read_lock(self.zk_client, tenant_name)
|
||||
tlock = tenant_read_lock(self.zk_client, tenant_name, self.log)
|
||||
# Consider all caches valid (min. ltime -1)
|
||||
min_ltimes = defaultdict(lambda: defaultdict(lambda: -1))
|
||||
with tlock:
|
||||
|
@ -1047,7 +1047,7 @@ class Client(zuul.cmd.ZuulApp):
|
||||
safe_tenant = urllib.parse.quote_plus(args.tenant)
|
||||
safe_pipeline = urllib.parse.quote_plus(args.pipeline)
|
||||
COMPONENT_REGISTRY.create(zk_client)
|
||||
with tenant_read_lock(zk_client, args.tenant):
|
||||
with tenant_read_lock(zk_client, args.tenant, self.log):
|
||||
path = f'/zuul/tenant/{safe_tenant}/pipeline/{safe_pipeline}'
|
||||
pipeline = Pipeline(args.tenant, args.pipeline)
|
||||
with pipeline_lock(
|
||||
|
@ -761,7 +761,7 @@ class Scheduler(threading.Thread):
|
||||
for tenant in self.abide.tenants.values():
|
||||
try:
|
||||
with tenant_read_lock(self.zk_client, tenant.name,
|
||||
blocking=False):
|
||||
self.log, blocking=False):
|
||||
if not self.isTenantLayoutUpToDate(tenant.name):
|
||||
self.log.debug(
|
||||
"Skipping leaked pipeline cleanup for tenant %s",
|
||||
@ -1028,11 +1028,13 @@ class Scheduler(threading.Thread):
|
||||
# There is no need to use the reconfig lock ID here as
|
||||
# we are starting from an empty layout state and there
|
||||
# should be no concurrent read locks.
|
||||
lock_ctx = tenant_write_lock(self.zk_client, tenant_name)
|
||||
lock_ctx = tenant_write_lock(self.zk_client,
|
||||
tenant_name, self.log)
|
||||
timer_ctx = self.statsd_timer(
|
||||
f'{stats_key}.reconfiguration_time')
|
||||
else:
|
||||
lock_ctx = tenant_read_lock(self.zk_client, tenant_name)
|
||||
lock_ctx = tenant_read_lock(self.zk_client,
|
||||
tenant_name, self.log)
|
||||
timer_ctx = nullcontext()
|
||||
|
||||
with lock_ctx as tlock, timer_ctx:
|
||||
@ -1278,7 +1280,7 @@ class Scheduler(threading.Thread):
|
||||
self.updateSystemConfig()
|
||||
|
||||
with tenant_read_lock(self.zk_client, tenant_name,
|
||||
blocking=False):
|
||||
log, blocking=False):
|
||||
remote_state = self.tenant_layout_state.get(
|
||||
tenant_name)
|
||||
local_state = self.local_layout_state.get(
|
||||
@ -1535,7 +1537,7 @@ class Scheduler(threading.Thread):
|
||||
|
||||
stats_key = f'zuul.tenant.{tenant_name}'
|
||||
with (tenant_write_lock(
|
||||
self.zk_client, tenant_name,
|
||||
self.zk_client, tenant_name, self.log,
|
||||
identifier=RECONFIG_LOCK_ID) as lock,
|
||||
self.statsd_timer(f'{stats_key}.reconfiguration_time')):
|
||||
tenant = loader.loadTenant(
|
||||
@ -1601,7 +1603,7 @@ class Scheduler(threading.Thread):
|
||||
with self.layout_lock[event.tenant_name]:
|
||||
old_tenant = self.abide.tenants.get(event.tenant_name)
|
||||
with (tenant_write_lock(
|
||||
self.zk_client, event.tenant_name,
|
||||
self.zk_client, event.tenant_name, self.log,
|
||||
identifier=RECONFIG_LOCK_ID) as lock,
|
||||
self.statsd_timer(f'{stats_key}.reconfiguration_time')):
|
||||
log.debug("Loading tenant %s", event.tenant_name)
|
||||
@ -2174,7 +2176,7 @@ class Scheduler(threading.Thread):
|
||||
|
||||
try:
|
||||
with tenant_read_lock(
|
||||
self.zk_client, tenant_name, blocking=False
|
||||
self.zk_client, tenant_name, self.log, blocking=False
|
||||
) as tlock:
|
||||
if not self.isTenantLayoutUpToDate(tenant_name):
|
||||
continue
|
||||
|
@ -2760,7 +2760,7 @@ class ZuulWeb(object):
|
||||
== self.tenant_layout_state.get(tenant_name)):
|
||||
return
|
||||
self.log.debug("Reloading tenant %s", tenant_name)
|
||||
with tenant_read_lock(self.zk_client, tenant_name):
|
||||
with tenant_read_lock(self.zk_client, tenant_name, self.log):
|
||||
layout_state = self.tenant_layout_state.get(tenant_name)
|
||||
layout_uuid = layout_state and layout_state.uuid
|
||||
|
||||
|
@ -88,20 +88,32 @@ def locked(lock, blocking=True, timeout=None):
|
||||
|
||||
|
||||
@contextmanager
|
||||
def tenant_read_lock(client, tenant_name, blocking=True):
|
||||
def tenant_read_lock(client, tenant_name, log=None, blocking=True):
|
||||
safe_tenant = quote_plus(tenant_name)
|
||||
if blocking and log:
|
||||
log.debug("Wait for %s read tenant lock", tenant_name)
|
||||
with locked(
|
||||
SessionAwareReadLock(
|
||||
client.client,
|
||||
f"{TENANT_LOCK_ROOT}/{safe_tenant}"),
|
||||
blocking=blocking
|
||||
) as lock:
|
||||
yield lock
|
||||
try:
|
||||
if log:
|
||||
log.debug("Aquired %s read tenant lock", tenant_name)
|
||||
yield lock
|
||||
finally:
|
||||
if log:
|
||||
log.debug("Released %s read tenant lock", tenant_name)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def tenant_write_lock(client, tenant_name, blocking=True, identifier=None):
|
||||
def tenant_write_lock(client, tenant_name, log=None, blocking=True,
|
||||
identifier=None):
|
||||
safe_tenant = quote_plus(tenant_name)
|
||||
if blocking and log:
|
||||
log.debug("Wait for %s write tenant lock (id: %s)",
|
||||
tenant_name, identifier)
|
||||
with locked(
|
||||
SessionAwareWriteLock(
|
||||
client.client,
|
||||
@ -109,7 +121,15 @@ def tenant_write_lock(client, tenant_name, blocking=True, identifier=None):
|
||||
identifier=identifier),
|
||||
blocking=blocking,
|
||||
) as lock:
|
||||
yield lock
|
||||
try:
|
||||
if log:
|
||||
log.debug("Aquired %s write tenant lock (id: %s)",
|
||||
tenant_name, identifier)
|
||||
yield lock
|
||||
finally:
|
||||
if log:
|
||||
log.debug("Released %s write tenant lock (id: %s)",
|
||||
tenant_name, identifier)
|
||||
|
||||
|
||||
@contextmanager
|
||||
|
Loading…
Reference in New Issue
Block a user