Refactor config loading in builder and launcher

In I93400cc156d09ea1add4fc753846df923242c0e6 we've refactore the
launcher config loading to use the last modified timestamps of the
config files to detect if a reload is necessary.

In the builder the situation is even worse as we reload and compare the
config much more often e.g. in the build worker when checking for manual
or scheduled image updates.

With a larger config (2-3MB range) this is a significant performance
problem that can lead to builders being busy with config loading instead
of building images.

Yappi profile (performed with the optimization proposed in
I786daa20ca428039a44d14b1e389d4d3fd62a735, which doesn't fully solve the
problem):

name                                  ncall  tsub      ttot      tavg
..py:880 AwsProviderDiskImage.__eq__  812..  17346.57  27435.41  0.000034
..odepool/config.py:281 Label.__eq__  155..  1.189220  27403.11  0.176285
..643 BuildWorker._checkConfigRecent  58     0.000000  27031.40  466.0586
..depool/config.py:118 Config.__eq__  58     0.000000  26733.50  460.9225

Change-Id: I929bdb757eb9e077012b530f6f872bea96ec8bbc
This commit is contained in:
Simon Westphahl 2024-01-30 13:59:36 +01:00
parent 42f9100d82
commit 4ae0a6f9a6
No known key found for this signature in database
3 changed files with 52 additions and 38 deletions

View File

@ -132,6 +132,10 @@ class BaseWorker(threading.Thread):
self.log.debug("Detected ZooKeeper server changes")
self._zk.resetHosts(list(new_config.zookeeper_servers.values()))
def _checkConfigRecent(self):
return self._config and nodepool_config.checkRecentConfig(
self._config, self._config_path, self._secure_path)
def _readConfig(self):
new_config = nodepool_config.loadConfig(self._config_path)
if self._secure_path:
@ -585,16 +589,15 @@ class CleanupWorker(BaseWorker):
'''
Body of run method for exception handling purposes.
'''
new_config = self._readConfig()
if not self._config:
if not self._checkConfigRecent():
new_config = self._readConfig()
if not self._config:
self._config = new_config
self._checkForZooKeeperChanges(new_config)
provider_manager.ProviderManager.reconfigure(
self._config, new_config, self._zk, only_image_manager=True)
self._config = new_config
self._checkForZooKeeperChanges(new_config)
provider_manager.ProviderManager.reconfigure(self._config, new_config,
self._zk,
only_image_manager=True)
self._config = new_config
self._cleanup()
self._emitBuildRequestStats()
@ -648,8 +651,7 @@ class BuildWorker(BaseWorker):
if not self._running or self._zk.suspended or self._zk.lost:
return
try:
new_config = self._readConfig()
if new_config != self._config:
if not self._checkConfigRecent():
# If our config isn't up to date then return and start
# over with a new config load.
return
@ -761,8 +763,7 @@ class BuildWorker(BaseWorker):
if not self._running or self._zk.suspended or self._zk.lost:
return
try:
new_config = self._readConfig()
if new_config != self._config:
if not self._checkConfigRecent():
# If our config isn't up to date then return and start
# over with a new config load.
return
@ -1036,13 +1037,13 @@ class BuildWorker(BaseWorker):
'''
Body of run method for exception handling purposes.
'''
# NOTE: For the first iteration, we expect self._config to be None
new_config = self._readConfig()
if not self._config:
if not self._checkConfigRecent():
new_config = self._readConfig()
if not self._config:
self._config = new_config
self._checkForZooKeeperChanges(new_config)
self._config = new_config
self._checkForZooKeeperChanges(new_config)
self._config = new_config
self._checkForScheduledImageUpdates()
self._checkForManualBuildRequest()
@ -1059,16 +1060,15 @@ class UploadWorker(BaseWorker):
'''
Reload the nodepool configuration file.
'''
new_config = self._readConfig()
if not self._config:
if not self._checkConfigRecent():
new_config = self._readConfig()
if not self._config:
self._config = new_config
self._checkForZooKeeperChanges(new_config)
provider_manager.ProviderManager.reconfigure(
self._config, new_config, self._zk, only_image_manager=True)
self._config = new_config
self._checkForZooKeeperChanges(new_config)
provider_manager.ProviderManager.reconfigure(self._config, new_config,
self._zk,
only_image_manager=True)
self._config = new_config
def _uploadImage(self, build_id, upload_id, image_name, images, provider,
username, python_path, shell_type):
'''

View File

@ -114,6 +114,8 @@ class Config(ConfigValue):
self.max_hold_age = None
self.webapp = None
self.tenant_resource_limits = {}
# Last modified timestamps of loaded config files
self.config_mtimes = {}
def __eq__(self, other):
if isinstance(other, Config):
@ -133,6 +135,9 @@ class Config(ConfigValue):
)
return False
def setConfigPathMtime(self, path, mtime):
self.config_mtimes[path] = mtime
def setElementsDir(self, value):
self.elements_dir = value
@ -428,6 +433,7 @@ def openConfig(path, env):
def loadConfig(config_path, env=os.environ):
config_mtime = os.stat(config_path).st_mtime_ns
config = openConfig(config_path, env)
# Call driver config reset now to clean global hooks like openstacksdk
@ -449,11 +455,13 @@ def loadConfig(config_path, env=os.environ):
newconfig.setProviders(config.get('providers'))
newconfig.setZooKeeperTLS(config.get('zookeeper-tls'))
newconfig.setTenantResourceLimits(config.get('tenant-resource-limits'))
newconfig.setConfigPathMtime(config_path, config_mtime)
return newconfig
def loadSecureConfig(config, secure_config_path, env=os.environ):
secure_mtime = os.stat(secure_config_path).st_mtime_ns
secure = openConfig(secure_config_path, env)
if not secure: # empty file
return
@ -466,3 +474,19 @@ def loadSecureConfig(config, secure_config_path, env=os.environ):
config.setSecureDiskimageEnv(
secure.get('diskimages', []), secure_config_path)
config.setZooKeeperTLS(secure.get('zookeeper-tls'))
config.setConfigPathMtime(secure_config_path, secure_mtime)
def checkRecentConfig(config, config_path, secure_path=None):
current_config_mtime = config.config_mtimes.get(config_path, 0)
new_config_mtime = os.stat(config_path).st_mtime_ns
if current_config_mtime != new_config_mtime:
return False
if secure_path:
current_secure_mtime = config.config_mtimes.get(secure_path, 0)
new_secure_mtime = os.stat(secure_path).st_mtime_ns
if current_secure_mtime != new_secure_mtime:
return False
return True

View File

@ -1034,9 +1034,7 @@ class NodePool(threading.Thread):
watermark_sleep=WATERMARK_SLEEP):
threading.Thread.__init__(self, name='NodePool')
self.securefile = securefile
self._secure_mtime = 0
self.configfile = configfile
self._config_mtime = 0
self.watermark_sleep = watermark_sleep
self.cleanup_interval = 60
self.delete_interval = 5
@ -1145,15 +1143,9 @@ class NodePool(threading.Thread):
t.provider_name == provider_name]
def updateConfig(self):
secure_mtime = 0
config_mtime = os.stat(self.configfile).st_mtime_ns
if self._config_mtime == config_mtime:
if self.securefile:
secure_mtime = os.stat(self.securefile).st_mtime_ns
if self._secure_mtime == secure_mtime:
return
else:
return
if self.config and nodepool_config.checkRecentConfig(
self.config, self.configfile, self.securefile):
return
config = self.loadConfig()
self.reconfigureZooKeeper(config)
@ -1164,8 +1156,6 @@ class NodePool(threading.Thread):
del config.providers[provider_name]
self.setConfig(config)
self._config_mtime = config_mtime
self._secure_mtime = secure_mtime
def removeCompletedRequests(self):
'''