Use image cache when launching nodes

We consult ZooKeeper to determine the most recent image upload
when we decide whether we should accept or decline a request.  If
we accept the request, we also consult it again for the same
information when we start building the node.  In both cases, we
can use the cache to avoid what may potentially be (especially in
the case of a large number of images or uploads) quite a lot of
ZK requests.  Our cache should be almost up to date (typically
milliseconds, or at the worst, seconds behind), and the worst
case is equivalent to what would happen if an image build took
just a few seconds longer.  The tradeoff is worth it.

Similarly, when we create min-ready requests, we can also consult
the cache.

With those 3 changes, all references to getMostRecentImageUpload
in Nodepool use the cache.

The original un-cached method is kept as well, because there are
an enormous number of references to it in the unit tests and they
don't have caching enabled.

Change-Id: Iac1ff8adfbdb8eb9a286929a59cf07cd0b4ac7ad
This commit is contained in:
James E. Blair 2023-03-14 15:28:39 -07:00
parent 70f143690d
commit c767db9391
3 changed files with 36 additions and 22 deletions

View File

@ -99,7 +99,7 @@ class StateMachineNodeLauncher(stats.StatsReporter):
diskimage = self.provider_config.diskimages[
label.diskimage.name]
cloud_image = self.zk.getMostRecentImageUpload(
diskimage.name, self.provider_config.name)
diskimage.name, self.provider_config.name, cached=True)
if not cloud_image:
raise exceptions.LaunchNodepoolException(
@ -374,7 +374,7 @@ class StateMachineHandler(NodeRequestHandler):
else:
if not self.zk.getMostRecentImageUpload(
self.pool.labels[label].diskimage.name,
self.provider.name):
self.provider.name, cached=True):
return False
return True

View File

@ -1176,7 +1176,8 @@ class NodePool(threading.Thread):
for pool_label in pool.labels.values():
if pool_label.diskimage:
if self.zk.getMostRecentImageUpload(
pool_label.diskimage.name, pool.provider.name):
pool_label.diskimage.name, pool.provider.name,
cached=True):
return True
else:
manager = self.getProviderManager(pool.provider.name)

View File

@ -1388,39 +1388,52 @@ class ZooKeeper(ZooKeeperBase):
return uploads[:count]
def getMostRecentImageUpload(self, image, provider,
state=READY):
state=READY, cached=False):
'''
Retrieve the most recent image upload data with the given state.
:param str image: The image name.
:param str provider: The provider name owning the image.
:param str state: The image upload state to match on.
:param bool cached: Whether to use cached data.
:returns: An ImageUpload object matching the given state, or
None if there is no recent upload.
'''
uploads = []
recent_data = None
for build_number in self.getBuildNumbers(image):
path = self._imageUploadPath(image, build_number, provider)
if cached:
uploads = self.getCachedImageUploads()
else:
for build_number in self.getBuildNumbers(image):
path = self._imageUploadPath(image, build_number, provider)
try:
upload_numbers = self.kazoo_client.get_children(path)
except kze.NoNodeError:
upload_numbers = []
try:
uploads = self.kazoo_client.get_children(path)
except kze.NoNodeError:
uploads = []
for upload_number in upload_numbers:
if upload_number == 'lock': # skip the upload lock node
continue
data = self.getImageUpload(
image, build_number, provider, upload_number)
if not data or data.state != state:
continue
uploads.append(data)
for upload in uploads:
if upload == 'lock': # skip the upload lock node
continue
data = self.getImageUpload(
image, build_number, provider, upload)
if not data or data.state != state:
continue
elif (recent_data is None or
recent_data.state_time < data.state_time):
recent_data = data
recent_upload = None
for upload in uploads:
if upload.image_name != image:
continue
if upload.provider_name != provider:
continue
if upload.state != state:
continue
if (recent_upload is None or
recent_upload.state_time < upload.state_time):
recent_upload = upload
return recent_data
return recent_upload
def storeImageUpload(self, image, build_number, provider, image_data,
upload_number=None):