Support optional post upload hooks

There are several scenarios where it can be useful hook into nodepool
after an image got uploaded but before it is taken into use by the
launchers. One use case is to be able to run validations on the image
(e.g. image size, boot test, etc.) before nodepool tries to use that
image and causing potentially node_failures. Another more advanced use
case is to be able to pre-distribute an image to all compute nodes in
a cloud before an image is used at scale.

To facilitate these use cases this adds a new config option
post-upload-hook to the provider config. This takes a path to a user
defined executable script which then can perform various tasks. If the
process fails with an rc != 0 the image gets deleted again and the
upload fails.

Change-Id: I099cf1243b1bd262b8ee96ab323dbd34c7578c10
changes/66/676266/7
Tobias Henkel 4 years ago
parent 915be0a5be
commit 0dc40d33e4
No known key found for this signature in database
GPG Key ID: 03750DEC158E5FA2

@ -446,6 +446,7 @@ Selecting the OpenStack driver adds the following options to the
launch-retries: 3
image-name-format: '{image_name}-{timestamp}'
hostname-format: '{label.name}-{provider.name}-{node.id}'
post-upload-hook: /usr/bin/custom-hook
diskimages:
- name: trusty
meta:
@ -567,6 +568,20 @@ Selecting the OpenStack driver adds the following options to the
Format for image names that are uploaded to providers.
.. attr:: post-upload-hook
:type: string
:default: None
Filename of an optional script that can be called after an image has
been uploaded to a provider but before it is taken into use. This is
useful to perform last minute validation tests before an image is
really used for build nodes. The script will be called as follows:
``<SCRIPT> <PROVIDER> <EXTERNAL_IMAGE_ID> <LOCAL_IMAGE_FILENAME>``
If the script returns with result code 0 it is treated as successful
otherwise it is treated as failed and the image gets deleted.
.. attr:: rate
:type: int seconds
:default: 1

@ -1082,6 +1082,42 @@ class UploadWorker(BaseWorker):
data.state = zk.FAILED
return data
if provider.post_upload_hook:
try:
cmd = [
provider.post_upload_hook,
provider.name,
external_id,
filename
]
self.log.info('Running post upload hook %s', cmd)
p = subprocess.run(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, check=True)
except Exception as e:
if isinstance(e, subprocess.CalledProcessError):
self.log.error('Post upload hook failed with '
'exit code %s\nstdout:\n%s\nstderr:\n%s',
e.returncode, e.stdout.decode(),
e.stderr.decode())
else:
self.log.exception('Unknown exception during upload hook')
try:
manager.deleteImage(ext_image_name)
except Exception:
# Image delete failed but we cannot do anything about this
# right now so just log the exception.
self.log.exception('Unable to delete image "%s"',
ext_image_name)
data = zk.ImageUpload()
data.state = zk.FAILED
return data
self.log.info(
'Post upload hook success with exit code %s\n'
'stdout:\n%s\nstderr:\n%s',
p.returncode, p.stdout.decode(), p.stderr.decode())
if self._statsd:
dt = int((time.time() - start_time) * 1000)
key = 'nodepool.image_update.%s.%s' % (image_name,

@ -235,6 +235,7 @@ class OpenStackProviderConfig(ProviderConfig):
self.cloud_images = {}
self.hostname_format = None
self.image_name_format = None
self.post_upload_hook = None
super().__init__(provider)
def __eq__(self, other):
@ -250,7 +251,8 @@ class OpenStackProviderConfig(ProviderConfig):
other.port_cleanup_interval ==
self.port_cleanup_interval and
other.diskimages == self.diskimages and
other.cloud_images == self.cloud_images)
other.cloud_images == self.cloud_images and
other.post_upload_hook == self.post_upload_hook)
return False
def _cloudKwargs(self):
@ -292,6 +294,7 @@ class OpenStackProviderConfig(ProviderConfig):
'image-name-format',
'{image_name}-{timestamp}'
)
self.post_upload_hook = self.provider.get('post-upload-hook')
default_port_mapping = {
'ssh': 22,
@ -425,6 +428,7 @@ class OpenStackProviderConfig(ProviderConfig):
'pools': [pool],
'diskimages': [provider_diskimage],
'cloud-images': [provider_cloud_images],
'post-upload-hook': str,
})
return v.Schema(schema)

@ -77,6 +77,7 @@ providers:
boot-timeout: 120
rate: 0.001
port-cleanup-interval: 0
post-upload-hook: /usr/bin/upload-hook
diskimages:
- name: trusty
pause: False

@ -0,0 +1,45 @@
elements-dir: .
images-dir: '{images_dir}'
build-log-dir: '{build_log_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 0
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
post-upload-hook: nodepool/tests/post-upload-hook
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 96
availability-zones:
- az1
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
flavor-name: 'Fake'
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
dib-cmd: nodepool/tests/fake-image-create
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

@ -0,0 +1,20 @@
#!/bin/bash
set -eu
echo "*** post-upload-hook: start"
echo "arguments:"
echo "----"
echo $*
echo "----"
PROVIDER=$1
IMAGE_ID=$2
LOCAL_IMAGE_FILENAME=$3
STATEFILE="$(dirname $LOCAL_IMAGE_FILENAME)/$(basename $LOCAL_IMAGE_FILENAME).post"
# Tests might need to know when this process is completed
echo "Creating state file $STATEFILE"
touch $STATEFILE

@ -447,3 +447,13 @@ class TestNodePoolBuilder(tests.DBTestCase):
cleanup_mgr.deleteImage = saved_method
self.waitForUploadRecordDeletion(image.provider_name, image.image_name,
image.build_id, image.id)
def test_post_upload_hook(self):
configfile = self.setup_config('node_upload_hook.yaml')
bldr = self.useBuilder(configfile)
self.waitForImage('fake-provider', 'fake-image')
images_dir = bldr._config.imagesdir
post_file = os.path.join(
images_dir, 'fake-image-0000000001.qcow2.post')
self.assertTrue(os.path.exists(post_file), 'Post hook file exists')

Loading…
Cancel
Save