Combine full and smart reconfiguration events

Full and smart reconfigurations only differ in a few areas, mainly which
tenans are reconfigured and clearing of the connection and unparsed
branch caches.

Clearing of connection caches doesn't seem to be needed in practice and
will be removed as this can't be easily coordinated between multiple
schedulers. However, since some tests seem to depend on that behavior
this will be done in a separate change.

We will also no longer create a new Abide instance for full
reconfigurations as this would also require additional coordination
between multiple schedulers if we wanted to replicate that.

Change-Id: I746d81547ad8e0e0f669981d0fcce65f9fd2e32b
This commit is contained in:
Simon Westphahl 2021-07-05 11:38:39 +02:00
parent db0bf681d5
commit 9f69ecaebe
5 changed files with 44 additions and 76 deletions

View File

@ -8836,6 +8836,7 @@ class TestSchedulerSmartReconfiguration(ZuulTestCase):
# The tenant-three has been removed so nothing should be reported
self.assertEqual(0, C.reported)
self.assertNotIn('tenant-three', self.scheds.first.sched.abide.tenants)
# Verify known tenants
expected_tenants = {'tenant-one', 'tenant-two', 'tenant-four'}

View File

@ -2266,6 +2266,7 @@ class ConfigLoader(object):
return unparsed_abide
def loadAdminRules(self, abide, unparsed_abide):
abide.admin_rules.clear()
for conf_admin_rule in unparsed_abide.admin_rules:
admin_rule = self.admin_rule_parser.fromYaml(conf_admin_rule)
abide.admin_rules[admin_rule.name] = admin_rule

View File

@ -3760,10 +3760,20 @@ class ReconfigureEvent(ManagementEvent):
"""Reconfigure the scheduler. The layout will be (re-)loaded from
the path specified in the configuration."""
def __init__(self, smart=False):
super(ReconfigureEvent, self).__init__()
self.smart = smart
class SmartReconfigureEvent(ManagementEvent):
"""Reconfigure the scheduler. The layout will be (re-)loaded from
the path specified in the configuration."""
def toDict(self):
d = super().toDict()
d["smart"] = self.smart
return d
@classmethod
def fromDict(cls, data):
event = cls(data.get("smart", False))
event.updateFromDict(data)
return event
class TenantReconfigureEvent(ManagementEvent):

View File

@ -62,7 +62,6 @@ from zuul.model import (
NodesProvisionedEvent,
PromoteEvent,
ReconfigureEvent,
SmartReconfigureEvent,
TenantReconfigureEvent,
TimeDataBase,
UnparsedAbideConfig,
@ -628,11 +627,8 @@ class Scheduler(threading.Thread):
def reconfigure(self, config, smart=False):
self.log.debug("Submitting reconfiguration event")
if smart:
event = SmartReconfigureEvent()
else:
event = ReconfigureEvent()
event = ReconfigureEvent(smart=smart)
event.ack_ref = threading.Event()
self.reconfigure_event_queue.put(event)
self.wake_event.set()
@ -869,64 +865,12 @@ class Scheduler(threading.Thread):
duration)
def _doReconfigureEvent(self, event):
# This is called in the scheduler loop after another thread submits
# a request
self.layout_lock.acquire()
self.config = self._zuul_app.config
try:
self.log.info("Full reconfiguration beginning")
start = time.monotonic()
# Reload the ansible manager in case the default ansible version
# changed.
default_ansible_version = get_default(
self.config, 'scheduler', 'default_ansible_version', None)
self.ansible_manager = AnsibleManager(
default_version=default_ansible_version)
for connection in self.connections.connections.values():
self.log.debug("Clear cache for: %s" % connection)
connection.clearCache()
loader = configloader.ConfigLoader(
self.connections, self, self.merger, self.keystore)
tenant_config, script = self._checkTenantSourceConf(self.config)
self.unparsed_abide = loader.readConfig(
tenant_config, from_script=script)
abide = Abide()
loader.loadAdminRules(abide, self.unparsed_abide)
loader.loadTPCs(abide, self.unparsed_abide)
for tenant_name in self.unparsed_abide.tenants:
tenant = loader.loadTenant(abide, tenant_name,
self.ansible_manager,
self.unparsed_abide,
cache_ltime=None)
if tenant is not None:
old_tenant = self.abide.tenants.get(tenant_name)
self._reconfigureTenant(tenant, old_tenant)
for old_tenant in self.abide.tenants.values():
if old_tenant.name not in abide.tenants:
# We deleted a tenant
self._reconfigureDeleteTenant(old_tenant)
self.abide = abide
finally:
self.layout_lock.release()
duration = round(time.monotonic() - start, 3)
self.log.info("Full reconfiguration complete (duration: %s seconds)",
duration)
def _doSmartReconfigureEvent(self, event):
# This is called in the scheduler loop after another thread submits
# a request
reconfigured_tenants = []
with self.layout_lock:
self.config = self._zuul_app.config
self.log.info("Smart reconfiguration beginning")
self.log.info("Reconfiguration beginning (smart=%s)", event.smart)
start = time.monotonic()
# Reload the ansible manager in case the default ansible version
@ -936,6 +880,11 @@ class Scheduler(threading.Thread):
self.ansible_manager = AnsibleManager(
default_version=default_ansible_version)
if not event.smart:
for connection in self.connections.connections.values():
self.log.debug("Clear cache for: %s" % connection)
connection.clearCache()
loader = configloader.ConfigLoader(
self.connections, self, self.merger, self.keystore)
tenant_config, script = self._checkTenantSourceConf(self.config)
@ -943,8 +892,8 @@ class Scheduler(threading.Thread):
self.unparsed_abide = loader.readConfig(
tenant_config, from_script=script)
# We need to handle new and deleted tenants so we need to process
# all tenants from the currently known and the new ones.
# We need to handle new and deleted tenants, so we need to process
# all tenants currently known and the new ones.
tenant_names = {t for t in self.abide.tenants}
tenant_names.update(self.unparsed_abide.tenants.keys())
@ -953,29 +902,39 @@ class Scheduler(threading.Thread):
self.unparsed_abide.tenants.keys())
for tenant_name in deleted_tenants:
self.abide.clearTPCs(tenant_name)
loader.loadTPCs(self.abide, self.unparsed_abide)
loader.loadAdminRules(self.abide, self.unparsed_abide)
cache_ltime = event.zuul_event_ltime
if not event.smart:
self.abide.unparsed_project_branch_cache.clear()
# Force a reload of the config via the mergers
cache_ltime = None
for tenant_name in tenant_names:
old_tenant = old_unparsed_abide.tenants.get(tenant_name)
new_tenant = self.unparsed_abide.tenants.get(tenant_name)
if old_tenant == new_tenant:
continue
if event.smart:
old_tenant = old_unparsed_abide.tenants.get(tenant_name)
new_tenant = self.unparsed_abide.tenants.get(tenant_name)
if old_tenant == new_tenant:
continue
reconfigured_tenants.append(tenant_name)
old_tenant = self.abide.tenants.get(tenant_name)
tenant = loader.loadTenant(self.abide, tenant_name,
self.ansible_manager,
self.unparsed_abide,
cache_ltime=event.zuul_event_ltime)
tenant = self.abide.tenants.get(tenant_name)
cache_ltime=cache_ltime)
reconfigured_tenants.append(tenant_name)
if tenant is not None:
self._reconfigureTenant(tenant, old_tenant)
else:
self._reconfigureDeleteTenant(old_tenant)
duration = round(time.monotonic() - start, 3)
self.log.info("Smart reconfiguration of tenants %s complete "
"(duration: %s seconds)", reconfigured_tenants, duration)
self.log.info("Reconfiguration complete (smart: %s, "
"duration: %s seconds)", event.smart, duration)
if event.smart:
self.log.info("Reconfigured tenants: %s", reconfigured_tenants)
def _doTenantReconfigureEvent(self, event):
# This is called in the scheduler loop after another thread submits
@ -1579,8 +1538,6 @@ class Scheduler(threading.Thread):
try:
if isinstance(event, ReconfigureEvent):
self._doReconfigureEvent(event)
elif isinstance(event, SmartReconfigureEvent):
self._doSmartReconfigureEvent(event)
else:
self.log.error("Unable to handle event %s", event)
finally:

View File

@ -46,7 +46,6 @@ MANAGEMENT_EVENT_TYPE_MAP = {
"EnqueueEvent": model.EnqueueEvent,
"PromoteEvent": model.PromoteEvent,
"ReconfigureEvent": model.ReconfigureEvent,
"SmartReconfigureEvent": model.SmartReconfigureEvent,
"TenantReconfigureEvent": model.TenantReconfigureEvent,
}