From ccf40a462a4805c7162bc34434cfe718cd96c448 Mon Sep 17 00:00:00 2001 From: David Shrewsbury <shrewsbury.dave@gmail.com> Date: Fri, 25 Jan 2019 17:36:31 +0000 Subject: [PATCH] Revert "Add a timeout for the image build" This reverts commit 7225354ec04f0466d95b6d68e51002723d6529c9. The disk-image-create command does not appear to be starting. Change-Id: I81abe25a253a385cae08a57561129a678546f18f --- doc/source/configuration.rst | 6 -- nodepool/builder.py | 45 ++++------- nodepool/cmd/config_validator.py | 1 - nodepool/config.py | 5 +- nodepool/tests/__init__.py | 21 ++--- nodepool/tests/fake-image-create | 78 ++++++------------- .../tests/fixtures/config_validate/good.yaml | 1 - .../fixtures/diskimage_build_timeout.yaml | 26 ------- nodepool/tests/unit/test_builder.py | 9 --- .../notes/build-timeout-bb68a1fd24f97a10.yaml | 6 -- 10 files changed, 50 insertions(+), 148 deletions(-) delete mode 100644 nodepool/tests/fixtures/diskimage_build_timeout.yaml delete mode 100644 releasenotes/notes/build-timeout-bb68a1fd24f97a10.yaml diff --git a/doc/source/configuration.rst b/doc/source/configuration.rst index 7bec67054..27abacf86 100644 --- a/doc/source/configuration.rst +++ b/doc/source/configuration.rst @@ -277,12 +277,6 @@ Options Specifies the distro to be used as a base image to build the image using diskimage-builder. - .. attr:: build-timeout - :type: int - - How long (in seconds) to wait for the diskimage build before giving up. - The default is 8 hours. - .. attr:: elements :type: list diff --git a/nodepool/builder.py b/nodepool/builder.py index 16fe7148b..dce709e9e 100755 --- a/nodepool/builder.py +++ b/nodepool/builder.py @@ -739,28 +739,21 @@ class BuildWorker(BaseWorker): if 'qcow2' in img_types: qemu_img_options = DEFAULT_QEMU_IMAGE_COMPAT_OPTIONS - log_fn = self._getBuildLog(diskimage.name, build_id) - - cmd = ('%s -x -t %s --checksum --no-tmpfs %s -o %s --logfile %s %s' % + cmd = ('%s -x -t %s --checksum --no-tmpfs %s -o %s %s' % (self.dib_cmd, img_types, qemu_img_options, filename, - log_fn, img_elements)) + img_elements)) self._pruneBuildLogs(diskimage.name) + log_fn = self._getBuildLog(diskimage.name, build_id) self.log.info('Running %s' % (cmd,)) self.log.info('Logging to %s' % (log_fn,)) start_time = time.monotonic() - - # We used to use readline() on stdout to output the lines to the - # build log. Unfortunately, this would block as long as the process - # ran (with no easy way to timeout the read) and wedge the builder. - # Now we use --logfile option to the dib command and set a timeout - # on the wait() call to prevent the wedge. - did_timeout = False try: p = subprocess.Popen( shlex.split(cmd), + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=env) except OSError as e: @@ -768,20 +761,17 @@ class BuildWorker(BaseWorker): "Failed to exec '%s'. Error: '%s'" % (cmd, e.strerror) ) - try: - rc = p.wait(timeout=diskimage.build_timeout) - except subprocess.TimeoutExpired: - p.kill() - did_timeout = True - rc = 1 - self.log.error( - "Build timeout for image %s, build %s (log: %s)", - diskimage.name, build_id, log_fn) - else: - # Append return code to dib's log file - with open(log_fn, 'ab') as log: - m = "Exit code: %s\n" % rc - log.write(m.encode('utf8')) + with open(log_fn, 'wb') as log: + while True: + ln = p.stdout.readline() + log.write(ln) + log.flush() + if not ln: + break + + rc = p.wait() + m = "Exit code: %s\n" % rc + log.write(m.encode('utf8')) # It's possible the connection to the ZK cluster could have been # interrupted during the build. If so, wait for it to return. @@ -806,10 +796,9 @@ class BuildWorker(BaseWorker): self.log.info("ZooKeeper lost while building %s" % diskimage.name) self._zk.resetLostFlag() build_data.state = zk.FAILED - elif p.returncode or did_timeout: + elif p.returncode: self.log.info( - "DIB failed creating %s (%s) (timeout=%s)" % ( - diskimage.name, p.returncode, did_timeout)) + "DIB failed creating %s (%s)" % (diskimage.name, p.returncode)) build_data.state = zk.FAILED else: self.log.info("DIB image %s is built" % diskimage.name) diff --git a/nodepool/cmd/config_validator.py b/nodepool/cmd/config_validator.py index f2646af34..c3dde57d7 100644 --- a/nodepool/cmd/config_validator.py +++ b/nodepool/cmd/config_validator.py @@ -44,7 +44,6 @@ class ConfigValidator: 'rebuild-age': int, 'env-vars': {str: str}, 'username': str, - 'build-timeout': int, } webapp = { diff --git a/nodepool/config.py b/nodepool/config.py index 7f09961c7..2b5e4ebd4 100755 --- a/nodepool/config.py +++ b/nodepool/config.py @@ -118,7 +118,6 @@ class Config(ConfigValue): d.image_types = set(diskimage.get('formats', [])) d.pause = bool(diskimage.get('pause', False)) d.username = diskimage.get('username', 'zuul') - d.build_timeout = diskimage.get('build-timeout', (8 * 60 * 60)) self.diskimages[d.name] = d def setSecureDiskimageEnv(self, diskimages, secure_config_path): @@ -180,7 +179,6 @@ class DiskImage(ConfigValue): self.image_types = None self.pause = False self.username = None - self.build_timeout = None def __eq__(self, other): if isinstance(other, DiskImage): @@ -191,8 +189,7 @@ class DiskImage(ConfigValue): other.env_vars == self.env_vars and other.image_types == self.image_types and other.pause == self.pause and - other.username == self.username and - other.build_timeout == self.build_timeout) + other.username == self.username) return False def __repr__(self): diff --git a/nodepool/tests/__init__.py b/nodepool/tests/__init__.py index 376b4fb5f..4a8e83d91 100644 --- a/nodepool/tests/__init__.py +++ b/nodepool/tests/__init__.py @@ -410,21 +410,9 @@ class DBTestCase(BaseTestCase): time.sleep(1) self.wait_for_threads() - def waitForBuild(self, image_name, build_id, states=None): - if states is None: - states = (zk.READY,) - + def waitForBuild(self, image_name, build_id): base = "-".join([image_name, build_id]) - while True: - self.wait_for_threads() - build = self.zk.getBuild(image_name, build_id) - if build and build.state in states: - break - time.sleep(1) - - # We should only expect a dib manifest with a successful build. - while build.state == zk.READY: self.wait_for_threads() files = builder.DibImageFile.from_image_id( self._config_images_dir.path, base) @@ -432,6 +420,13 @@ class DBTestCase(BaseTestCase): break time.sleep(1) + while True: + self.wait_for_threads() + build = self.zk.getBuild(image_name, build_id) + if build and build.state == zk.READY: + break + time.sleep(1) + self.wait_for_threads() return build diff --git a/nodepool/tests/fake-image-create b/nodepool/tests/fake-image-create index 1a2406fd2..9d236ff16 100755 --- a/nodepool/tests/fake-image-create +++ b/nodepool/tests/fake-image-create @@ -1,49 +1,10 @@ #!/bin/bash -outfile= -outtypes=("qcow2") - -all_args=$* -logfile= -checksum= -no_tmpfs= -qemu_img_options= -x= - -TEMP=$(getopt -o xo:t: --long qemu-img-options:,no-tmpfs,checksum,logfile: -- "$@") -if [ $? -ne 0 ]; then - echo "Invalid option" - exit 1 -fi -eval set -- "$TEMP" -while true ; do - case "$1" in - --checksum) - checksum=1; shift 1;; - --no-tmpfs) - no_tmpfs=1; shift 1;; - --qemu-img-options) - qemu_img_options=$2; shift 2;; - --logfile) - logfile=$2; shift 2;; - -o) outfile=$2; shift 2;; - -t) IFS="," read -a outtypes <<< "$2"; shift 2;; - -x) x=1; shift;; - --) shift ; break ;; - *) echo "Unknown option : $1"; exit 1;; - esac -done - -# If --logfile was given, direct stdout to it, as well -if [ ! -z "$logfile" ]; then - exec > >(tee -a ${logfile}) -fi - echo "*** fake-image-create: start" echo "arguments:" echo "----" -echo "$all_args" +echo $* echo "----" if [[ "${SHOULD_FAIL}" == 'true' ]]; then @@ -77,21 +38,30 @@ if [[ "${BASE_IMAGE_FILE}" != "Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2" exit 1 fi -if [ ! -z "$logfile" ]; then - echo " -> logfile: $logfile" -fi -if [ ! -z "$checksum" ]; then - echo " -> set --checksum" -fi -if [ ! -z "$no_tmpfs" ]; then - echo " -> set --no-tmpfs" -fi -if [ ! -z "$qemu_img_options" ]; then - echo " -> qemu-img-options: $qemu_img_options" -fi -if [ ! -z "$x" ]; then - echo " -> debugging enabled" +outfile= +outtypes=("qcow2") + +TEMP=$(getopt -o xo:t: --long qemu-img-options:,no-tmpfs,checksum -- "$@") +if [ $? -ne 0 ]; then + echo "Invalid option" + exit 1 fi +eval set -- "$TEMP" +while true ; do + case "$1" in + --checksum) + echo " -> set --checksum"; shift 1;; + --no-tmpfs) + echo " -> set --no-tmpfs"; shift 1;; + --qemu-img-options) + echo " -> qemu-img-options: $2"; shift 2;; + -o) outfile=$2; shift 2;; + -t) IFS="," read -a outtypes <<< "$2"; shift 2;; + -x) echo " -> debugging enabled"; shift;; + --) shift ; break ;; + *) echo "Unknown option : $1"; exit 1;; + esac +done if [ -z "$outfile" ]; then echo "No output file specified." diff --git a/nodepool/tests/fixtures/config_validate/good.yaml b/nodepool/tests/fixtures/config_validate/good.yaml index fc5993741..db7d1ecfd 100644 --- a/nodepool/tests/fixtures/config_validate/good.yaml +++ b/nodepool/tests/fixtures/config_validate/good.yaml @@ -130,7 +130,6 @@ diskimages: - cache-devstack release: trusty rebuild-age: 3600 - build-timeout: 3600 env-vars: TMPDIR: /opt/dib_tmp DIB_IMAGE_CACHE: /opt/dib_cache diff --git a/nodepool/tests/fixtures/diskimage_build_timeout.yaml b/nodepool/tests/fixtures/diskimage_build_timeout.yaml deleted file mode 100644 index 01770bddd..000000000 --- a/nodepool/tests/fixtures/diskimage_build_timeout.yaml +++ /dev/null @@ -1,26 +0,0 @@ -elements-dir: . -images-dir: '{images_dir}' -build-log-dir: '{build_log_dir}' - -zookeeper-servers: - - host: {zookeeper_host} - port: {zookeeper_port} - chroot: {zookeeper_chroot} - -labels: [] - -providers: [] - -diskimages: - - name: fake-image - formats: - - tar - elements: - - fedora - - vm - release: 21 - env-vars: - TMPDIR: /opt/dib_tmp - DIB_IMAGE_CACHE: /opt/dib_cache - DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/ - BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2 diff --git a/nodepool/tests/unit/test_builder.py b/nodepool/tests/unit/test_builder.py index cfea4b855..c992bf7aa 100644 --- a/nodepool/tests/unit/test_builder.py +++ b/nodepool/tests/unit/test_builder.py @@ -16,8 +16,6 @@ import os import uuid import fixtures -import mock -import subprocess from nodepool import builder, exceptions, tests from nodepool.driver.fake import provider as fakeprovider @@ -337,10 +335,3 @@ class TestNodePoolBuilder(tests.DBTestCase): self.assertEqual(build_default._formats, ['qcow2']) self.assertEqual(build_vhd._formats, ['vhd']) - - @mock.patch.object(subprocess.Popen, 'wait') - def test_diskimage_build_timeout(self, mock_wait): - mock_wait.side_effect = subprocess.TimeoutExpired('dib_cmd', 1) - configfile = self.setup_config('diskimage_build_timeout.yaml') - self.useBuilder(configfile, cleanup_interval=0) - self.waitForBuild('fake-image', '0000000001', states=(zk.FAILED,)) diff --git a/releasenotes/notes/build-timeout-bb68a1fd24f97a10.yaml b/releasenotes/notes/build-timeout-bb68a1fd24f97a10.yaml deleted file mode 100644 index 6901903db..000000000 --- a/releasenotes/notes/build-timeout-bb68a1fd24f97a10.yaml +++ /dev/null @@ -1,6 +0,0 @@ ---- -features: - - | - A new option (build-timeout) has been added to the builder diskimage - configuration to control how long the builder should wait for image - builds before giving up. The default is 8 hours.