Browse Source

Merge "Revert "Add a timeout for the image build""

tags/3.5.0
Zuul 10 months ago
parent
commit
f2c155821c
10 changed files with 49 additions and 147 deletions
  1. +0
    -6
      doc/source/configuration.rst
  2. +17
    -28
      nodepool/builder.py
  3. +0
    -1
      nodepool/cmd/config_validator.py
  4. +1
    -4
      nodepool/config.py
  5. +7
    -12
      nodepool/tests/__init__.py
  6. +24
    -54
      nodepool/tests/fake-image-create
  7. +0
    -1
      nodepool/tests/fixtures/config_validate/good.yaml
  8. +0
    -26
      nodepool/tests/fixtures/diskimage_build_timeout.yaml
  9. +0
    -9
      nodepool/tests/unit/test_builder.py
  10. +0
    -6
      releasenotes/notes/build-timeout-bb68a1fd24f97a10.yaml

+ 0
- 6
doc/source/configuration.rst View File

@@ -277,12 +277,6 @@ Options
Specifies the distro to be used as a base image to build the image using
diskimage-builder.

.. attr:: build-timeout
:type: int

How long (in seconds) to wait for the diskimage build before giving up.
The default is 8 hours.

.. attr:: elements
:type: list


+ 17
- 28
nodepool/builder.py View File

@@ -739,28 +739,21 @@ class BuildWorker(BaseWorker):
if 'qcow2' in img_types:
qemu_img_options = DEFAULT_QEMU_IMAGE_COMPAT_OPTIONS

log_fn = self._getBuildLog(diskimage.name, build_id)

cmd = ('%s -x -t %s --checksum --no-tmpfs %s -o %s --logfile %s %s' %
cmd = ('%s -x -t %s --checksum --no-tmpfs %s -o %s %s' %
(self.dib_cmd, img_types, qemu_img_options, filename,
log_fn, img_elements))
img_elements))

self._pruneBuildLogs(diskimage.name)
log_fn = self._getBuildLog(diskimage.name, build_id)

self.log.info('Running %s' % (cmd,))
self.log.info('Logging to %s' % (log_fn,))

start_time = time.monotonic()

# We used to use readline() on stdout to output the lines to the
# build log. Unfortunately, this would block as long as the process
# ran (with no easy way to timeout the read) and wedge the builder.
# Now we use --logfile option to the dib command and set a timeout
# on the wait() call to prevent the wedge.
did_timeout = False
try:
p = subprocess.Popen(
shlex.split(cmd),
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
env=env)
except OSError as e:
@@ -768,20 +761,17 @@ class BuildWorker(BaseWorker):
"Failed to exec '%s'. Error: '%s'" % (cmd, e.strerror)
)

try:
rc = p.wait(timeout=diskimage.build_timeout)
except subprocess.TimeoutExpired:
p.kill()
did_timeout = True
rc = 1
self.log.error(
"Build timeout for image %s, build %s (log: %s)",
diskimage.name, build_id, log_fn)
else:
# Append return code to dib's log file
with open(log_fn, 'ab') as log:
m = "Exit code: %s\n" % rc
log.write(m.encode('utf8'))
with open(log_fn, 'wb') as log:
while True:
ln = p.stdout.readline()
log.write(ln)
log.flush()
if not ln:
break

rc = p.wait()
m = "Exit code: %s\n" % rc
log.write(m.encode('utf8'))

# It's possible the connection to the ZK cluster could have been
# interrupted during the build. If so, wait for it to return.
@@ -806,10 +796,9 @@ class BuildWorker(BaseWorker):
self.log.info("ZooKeeper lost while building %s" % diskimage.name)
self._zk.resetLostFlag()
build_data.state = zk.FAILED
elif p.returncode or did_timeout:
elif p.returncode:
self.log.info(
"DIB failed creating %s (%s) (timeout=%s)" % (
diskimage.name, p.returncode, did_timeout))
"DIB failed creating %s (%s)" % (diskimage.name, p.returncode))
build_data.state = zk.FAILED
else:
self.log.info("DIB image %s is built" % diskimage.name)

+ 0
- 1
nodepool/cmd/config_validator.py View File

@@ -44,7 +44,6 @@ class ConfigValidator:
'rebuild-age': int,
'env-vars': {str: str},
'username': str,
'build-timeout': int,
}

webapp = {

+ 1
- 4
nodepool/config.py View File

@@ -118,7 +118,6 @@ class Config(ConfigValue):
d.image_types = set(diskimage.get('formats', []))
d.pause = bool(diskimage.get('pause', False))
d.username = diskimage.get('username', 'zuul')
d.build_timeout = diskimage.get('build-timeout', (8 * 60 * 60))
self.diskimages[d.name] = d

def setSecureDiskimageEnv(self, diskimages, secure_config_path):
@@ -180,7 +179,6 @@ class DiskImage(ConfigValue):
self.image_types = None
self.pause = False
self.username = None
self.build_timeout = None

def __eq__(self, other):
if isinstance(other, DiskImage):
@@ -191,8 +189,7 @@ class DiskImage(ConfigValue):
other.env_vars == self.env_vars and
other.image_types == self.image_types and
other.pause == self.pause and
other.username == self.username and
other.build_timeout == self.build_timeout)
other.username == self.username)
return False

def __repr__(self):

+ 7
- 12
nodepool/tests/__init__.py View File

@@ -411,25 +411,20 @@ class DBTestCase(BaseTestCase):
time.sleep(1)
self.wait_for_threads()

def waitForBuild(self, image_name, build_id, states=None):
if states is None:
states = (zk.READY,)

def waitForBuild(self, image_name, build_id):
base = "-".join([image_name, build_id])

while True:
self.wait_for_threads()
build = self.zk.getBuild(image_name, build_id)
if build and build.state in states:
files = builder.DibImageFile.from_image_id(
self._config_images_dir.path, base)
if files:
break
time.sleep(1)

# We should only expect a dib manifest with a successful build.
while build.state == zk.READY:
while True:
self.wait_for_threads()
files = builder.DibImageFile.from_image_id(
self._config_images_dir.path, base)
if files:
build = self.zk.getBuild(image_name, build_id)
if build and build.state == zk.READY:
break
time.sleep(1)


+ 24
- 54
nodepool/tests/fake-image-create View File

@@ -1,49 +1,10 @@
#!/bin/bash

outfile=
outtypes=("qcow2")

all_args=$*
logfile=
checksum=
no_tmpfs=
qemu_img_options=
x=

TEMP=$(getopt -o xo:t: --long qemu-img-options:,no-tmpfs,checksum,logfile: -- "$@")
if [ $? -ne 0 ]; then
echo "Invalid option"
exit 1
fi
eval set -- "$TEMP"
while true ; do
case "$1" in
--checksum)
checksum=1; shift 1;;
--no-tmpfs)
no_tmpfs=1; shift 1;;
--qemu-img-options)
qemu_img_options=$2; shift 2;;
--logfile)
logfile=$2; shift 2;;
-o) outfile=$2; shift 2;;
-t) IFS="," read -a outtypes <<< "$2"; shift 2;;
-x) x=1; shift;;
--) shift ; break ;;
*) echo "Unknown option : $1"; exit 1;;
esac
done

# If --logfile was given, direct stdout to it, as well
if [ ! -z "$logfile" ]; then
exec > >(tee -a ${logfile})
fi

echo "*** fake-image-create: start"

echo "arguments:"
echo "----"
echo "$all_args"
echo $*
echo "----"

if [[ "${SHOULD_FAIL}" == 'true' ]]; then
@@ -77,21 +38,30 @@ if [[ "${BASE_IMAGE_FILE}" != "Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2"
exit 1
fi

if [ ! -z "$logfile" ]; then
echo " -> logfile: $logfile"
fi
if [ ! -z "$checksum" ]; then
echo " -> set --checksum"
fi
if [ ! -z "$no_tmpfs" ]; then
echo " -> set --no-tmpfs"
fi
if [ ! -z "$qemu_img_options" ]; then
echo " -> qemu-img-options: $qemu_img_options"
fi
if [ ! -z "$x" ]; then
echo " -> debugging enabled"
outfile=
outtypes=("qcow2")

TEMP=$(getopt -o xo:t: --long qemu-img-options:,no-tmpfs,checksum -- "$@")
if [ $? -ne 0 ]; then
echo "Invalid option"
exit 1
fi
eval set -- "$TEMP"
while true ; do
case "$1" in
--checksum)
echo " -> set --checksum"; shift 1;;
--no-tmpfs)
echo " -> set --no-tmpfs"; shift 1;;
--qemu-img-options)
echo " -> qemu-img-options: $2"; shift 2;;
-o) outfile=$2; shift 2;;
-t) IFS="," read -a outtypes <<< "$2"; shift 2;;
-x) echo " -> debugging enabled"; shift;;
--) shift ; break ;;
*) echo "Unknown option : $1"; exit 1;;
esac
done

if [ -z "$outfile" ]; then
echo "No output file specified."

+ 0
- 1
nodepool/tests/fixtures/config_validate/good.yaml View File

@@ -152,7 +152,6 @@ diskimages:
- cache-devstack
release: trusty
rebuild-age: 3600
build-timeout: 3600
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache

+ 0
- 26
nodepool/tests/fixtures/diskimage_build_timeout.yaml View File

@@ -1,26 +0,0 @@
elements-dir: .
images-dir: '{images_dir}'
build-log-dir: '{build_log_dir}'

zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}

labels: []

providers: []

diskimages:
- name: fake-image
formats:
- tar
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

+ 0
- 9
nodepool/tests/unit/test_builder.py View File

@@ -16,8 +16,6 @@
import os
import uuid
import fixtures
import mock
import subprocess

from nodepool import builder, exceptions, tests
from nodepool.driver.fake import provider as fakeprovider
@@ -337,10 +335,3 @@ class TestNodePoolBuilder(tests.DBTestCase):

self.assertEqual(build_default._formats, ['qcow2'])
self.assertEqual(build_vhd._formats, ['vhd'])

@mock.patch.object(subprocess.Popen, 'wait')
def test_diskimage_build_timeout(self, mock_wait):
mock_wait.side_effect = subprocess.TimeoutExpired('dib_cmd', 1)
configfile = self.setup_config('diskimage_build_timeout.yaml')
self.useBuilder(configfile, cleanup_interval=0)
self.waitForBuild('fake-image', '0000000001', states=(zk.FAILED,))

+ 0
- 6
releasenotes/notes/build-timeout-bb68a1fd24f97a10.yaml View File

@@ -1,6 +0,0 @@
---
features:
- |
A new option (build-timeout) has been added to the builder diskimage
configuration to control how long the builder should wait for image
builds before giving up. The default is 8 hours.

Loading…
Cancel
Save