Browse Source

Revert "Add a timeout for the image build"

This reverts commit 7225354ec0.

The disk-image-create command does not appear to be starting.

Change-Id: I81abe25a253a385cae08a57561129a678546f18f
tags/3.5.0
David Shrewsbury 6 months ago
parent
commit
ccf40a462a

+ 0
- 6
doc/source/configuration.rst View File

@@ -277,12 +277,6 @@ Options
277 277
       Specifies the distro to be used as a base image to build the image using
278 278
       diskimage-builder.
279 279
 
280
-   .. attr:: build-timeout
281
-      :type: int
282
-
283
-      How long (in seconds) to wait for the diskimage build before giving up.
284
-      The default is 8 hours.
285
-
286 280
    .. attr:: elements
287 281
       :type: list
288 282
 

+ 17
- 28
nodepool/builder.py View File

@@ -739,28 +739,21 @@ class BuildWorker(BaseWorker):
739 739
         if 'qcow2' in img_types:
740 740
             qemu_img_options = DEFAULT_QEMU_IMAGE_COMPAT_OPTIONS
741 741
 
742
-        log_fn = self._getBuildLog(diskimage.name, build_id)
743
-
744
-        cmd = ('%s -x -t %s --checksum --no-tmpfs %s -o %s --logfile %s %s' %
742
+        cmd = ('%s -x -t %s --checksum --no-tmpfs %s -o %s %s' %
745 743
                (self.dib_cmd, img_types, qemu_img_options, filename,
746
-                log_fn, img_elements))
744
+                img_elements))
747 745
 
748 746
         self._pruneBuildLogs(diskimage.name)
747
+        log_fn = self._getBuildLog(diskimage.name, build_id)
749 748
 
750 749
         self.log.info('Running %s' % (cmd,))
751 750
         self.log.info('Logging to %s' % (log_fn,))
752 751
 
753 752
         start_time = time.monotonic()
754
-
755
-        # We used to use readline() on stdout to output the lines to the
756
-        # build log. Unfortunately, this would block as long as the process
757
-        # ran (with no easy way to timeout the read) and wedge the builder.
758
-        # Now we use --logfile option to the dib command and set a timeout
759
-        # on the wait() call to prevent the wedge.
760
-        did_timeout = False
761 753
         try:
762 754
             p = subprocess.Popen(
763 755
                 shlex.split(cmd),
756
+                stdout=subprocess.PIPE,
764 757
                 stderr=subprocess.STDOUT,
765 758
                 env=env)
766 759
         except OSError as e:
@@ -768,20 +761,17 @@ class BuildWorker(BaseWorker):
768 761
                 "Failed to exec '%s'. Error: '%s'" % (cmd, e.strerror)
769 762
             )
770 763
 
771
-        try:
772
-            rc = p.wait(timeout=diskimage.build_timeout)
773
-        except subprocess.TimeoutExpired:
774
-            p.kill()
775
-            did_timeout = True
776
-            rc = 1
777
-            self.log.error(
778
-                "Build timeout for image %s, build %s (log: %s)",
779
-                diskimage.name, build_id, log_fn)
780
-        else:
781
-            # Append return code to dib's log file
782
-            with open(log_fn, 'ab') as log:
783
-                m = "Exit code: %s\n" % rc
784
-                log.write(m.encode('utf8'))
764
+        with open(log_fn, 'wb') as log:
765
+            while True:
766
+                ln = p.stdout.readline()
767
+                log.write(ln)
768
+                log.flush()
769
+                if not ln:
770
+                    break
771
+
772
+            rc = p.wait()
773
+            m = "Exit code: %s\n" % rc
774
+            log.write(m.encode('utf8'))
785 775
 
786 776
         # It's possible the connection to the ZK cluster could have been
787 777
         # interrupted during the build. If so, wait for it to return.
@@ -806,10 +796,9 @@ class BuildWorker(BaseWorker):
806 796
             self.log.info("ZooKeeper lost while building %s" % diskimage.name)
807 797
             self._zk.resetLostFlag()
808 798
             build_data.state = zk.FAILED
809
-        elif p.returncode or did_timeout:
799
+        elif p.returncode:
810 800
             self.log.info(
811
-                "DIB failed creating %s (%s) (timeout=%s)" % (
812
-                    diskimage.name, p.returncode, did_timeout))
801
+                "DIB failed creating %s (%s)" % (diskimage.name, p.returncode))
813 802
             build_data.state = zk.FAILED
814 803
         else:
815 804
             self.log.info("DIB image %s is built" % diskimage.name)

+ 0
- 1
nodepool/cmd/config_validator.py View File

@@ -44,7 +44,6 @@ class ConfigValidator:
44 44
             'rebuild-age': int,
45 45
             'env-vars': {str: str},
46 46
             'username': str,
47
-            'build-timeout': int,
48 47
         }
49 48
 
50 49
         webapp = {

+ 1
- 4
nodepool/config.py View File

@@ -118,7 +118,6 @@ class Config(ConfigValue):
118 118
             d.image_types = set(diskimage.get('formats', []))
119 119
             d.pause = bool(diskimage.get('pause', False))
120 120
             d.username = diskimage.get('username', 'zuul')
121
-            d.build_timeout = diskimage.get('build-timeout', (8 * 60 * 60))
122 121
             self.diskimages[d.name] = d
123 122
 
124 123
     def setSecureDiskimageEnv(self, diskimages, secure_config_path):
@@ -180,7 +179,6 @@ class DiskImage(ConfigValue):
180 179
         self.image_types = None
181 180
         self.pause = False
182 181
         self.username = None
183
-        self.build_timeout = None
184 182
 
185 183
     def __eq__(self, other):
186 184
         if isinstance(other, DiskImage):
@@ -191,8 +189,7 @@ class DiskImage(ConfigValue):
191 189
                     other.env_vars == self.env_vars and
192 190
                     other.image_types == self.image_types and
193 191
                     other.pause == self.pause and
194
-                    other.username == self.username and
195
-                    other.build_timeout == self.build_timeout)
192
+                    other.username == self.username)
196 193
         return False
197 194
 
198 195
     def __repr__(self):

+ 7
- 12
nodepool/tests/__init__.py View File

@@ -410,25 +410,20 @@ class DBTestCase(BaseTestCase):
410 410
             time.sleep(1)
411 411
         self.wait_for_threads()
412 412
 
413
-    def waitForBuild(self, image_name, build_id, states=None):
414
-        if states is None:
415
-            states = (zk.READY,)
416
-
413
+    def waitForBuild(self, image_name, build_id):
417 414
         base = "-".join([image_name, build_id])
418
-
419 415
         while True:
420 416
             self.wait_for_threads()
421
-            build = self.zk.getBuild(image_name, build_id)
422
-            if build and build.state in states:
417
+            files = builder.DibImageFile.from_image_id(
418
+                self._config_images_dir.path, base)
419
+            if files:
423 420
                 break
424 421
             time.sleep(1)
425 422
 
426
-        # We should only expect a dib manifest with a successful build.
427
-        while build.state == zk.READY:
423
+        while True:
428 424
             self.wait_for_threads()
429
-            files = builder.DibImageFile.from_image_id(
430
-                self._config_images_dir.path, base)
431
-            if files:
425
+            build = self.zk.getBuild(image_name, build_id)
426
+            if build and build.state == zk.READY:
432 427
                 break
433 428
             time.sleep(1)
434 429
 

+ 24
- 54
nodepool/tests/fake-image-create View File

@@ -1,49 +1,10 @@
1 1
 #!/bin/bash
2 2
 
3
-outfile=
4
-outtypes=("qcow2")
5
-
6
-all_args=$*
7
-logfile=
8
-checksum=
9
-no_tmpfs=
10
-qemu_img_options=
11
-x=
12
-
13
-TEMP=$(getopt -o xo:t: --long qemu-img-options:,no-tmpfs,checksum,logfile: -- "$@")
14
-if [ $? -ne 0 ]; then
15
-    echo "Invalid option"
16
-    exit 1
17
-fi
18
-eval set -- "$TEMP"
19
-while true ; do
20
-    case "$1" in
21
-        --checksum)
22
-            checksum=1; shift 1;;
23
-        --no-tmpfs)
24
-            no_tmpfs=1; shift 1;;
25
-        --qemu-img-options)
26
-            qemu_img_options=$2; shift 2;;
27
-        --logfile)
28
-            logfile=$2; shift 2;;
29
-        -o) outfile=$2; shift 2;;
30
-        -t) IFS="," read -a outtypes <<< "$2"; shift 2;;
31
-        -x) x=1; shift;;
32
-        --) shift ; break ;;
33
-        *) echo "Unknown option : $1"; exit 1;;
34
-    esac
35
-done
36
-
37
-# If --logfile was given, direct stdout to it, as well
38
-if [ ! -z "$logfile" ]; then
39
-    exec > >(tee -a ${logfile})
40
-fi
41
-
42 3
 echo "*** fake-image-create: start"
43 4
 
44 5
 echo "arguments:"
45 6
 echo "----"
46
-echo "$all_args"
7
+echo $*
47 8
 echo "----"
48 9
 
49 10
 if [[ "${SHOULD_FAIL}" == 'true' ]]; then
@@ -77,21 +38,30 @@ if [[ "${BASE_IMAGE_FILE}" != "Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2"
77 38
     exit 1
78 39
 fi
79 40
 
80
-if [ ! -z "$logfile" ]; then
81
-    echo " -> logfile: $logfile"
82
-fi
83
-if [ ! -z "$checksum" ]; then
84
-    echo " -> set --checksum"
85
-fi
86
-if [ ! -z "$no_tmpfs" ]; then
87
-    echo " -> set --no-tmpfs"
88
-fi
89
-if [ ! -z "$qemu_img_options" ]; then
90
-    echo " -> qemu-img-options: $qemu_img_options"
91
-fi
92
-if [ ! -z "$x" ]; then
93
-    echo " -> debugging enabled"
41
+outfile=
42
+outtypes=("qcow2")
43
+
44
+TEMP=$(getopt -o xo:t: --long qemu-img-options:,no-tmpfs,checksum -- "$@")
45
+if [ $? -ne 0 ]; then
46
+    echo "Invalid option"
47
+    exit 1
94 48
 fi
49
+eval set -- "$TEMP"
50
+while true ; do
51
+    case "$1" in
52
+        --checksum)
53
+            echo " -> set --checksum"; shift 1;;
54
+        --no-tmpfs)
55
+            echo " -> set --no-tmpfs"; shift 1;;
56
+        --qemu-img-options)
57
+            echo " -> qemu-img-options: $2"; shift 2;;
58
+        -o) outfile=$2; shift 2;;
59
+        -t) IFS="," read -a outtypes <<< "$2"; shift 2;;
60
+        -x) echo " -> debugging enabled"; shift;;
61
+        --) shift ; break ;;
62
+        *) echo "Unknown option : $1"; exit 1;;
63
+    esac
64
+done
95 65
 
96 66
 if [ -z "$outfile" ]; then
97 67
     echo "No output file specified."

+ 0
- 1
nodepool/tests/fixtures/config_validate/good.yaml View File

@@ -130,7 +130,6 @@ diskimages:
130 130
       - cache-devstack
131 131
     release: trusty
132 132
     rebuild-age: 3600
133
-    build-timeout: 3600
134 133
     env-vars:
135 134
       TMPDIR: /opt/dib_tmp
136 135
       DIB_IMAGE_CACHE: /opt/dib_cache

+ 0
- 26
nodepool/tests/fixtures/diskimage_build_timeout.yaml View File

@@ -1,26 +0,0 @@
1
-elements-dir: .
2
-images-dir: '{images_dir}'
3
-build-log-dir: '{build_log_dir}'
4
-
5
-zookeeper-servers:
6
-  - host: {zookeeper_host}
7
-    port: {zookeeper_port}
8
-    chroot: {zookeeper_chroot}
9
-
10
-labels: []
11
-
12
-providers: []
13
-
14
-diskimages:
15
-  - name: fake-image
16
-    formats:
17
-      - tar
18
-    elements:
19
-      - fedora
20
-      - vm
21
-    release: 21
22
-    env-vars:
23
-      TMPDIR: /opt/dib_tmp
24
-      DIB_IMAGE_CACHE: /opt/dib_cache
25
-      DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
26
-      BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

+ 0
- 9
nodepool/tests/unit/test_builder.py View File

@@ -16,8 +16,6 @@
16 16
 import os
17 17
 import uuid
18 18
 import fixtures
19
-import mock
20
-import subprocess
21 19
 
22 20
 from nodepool import builder, exceptions, tests
23 21
 from nodepool.driver.fake import provider as fakeprovider
@@ -337,10 +335,3 @@ class TestNodePoolBuilder(tests.DBTestCase):
337 335
 
338 336
         self.assertEqual(build_default._formats, ['qcow2'])
339 337
         self.assertEqual(build_vhd._formats, ['vhd'])
340
-
341
-    @mock.patch.object(subprocess.Popen, 'wait')
342
-    def test_diskimage_build_timeout(self, mock_wait):
343
-        mock_wait.side_effect = subprocess.TimeoutExpired('dib_cmd', 1)
344
-        configfile = self.setup_config('diskimage_build_timeout.yaml')
345
-        self.useBuilder(configfile, cleanup_interval=0)
346
-        self.waitForBuild('fake-image', '0000000001', states=(zk.FAILED,))

+ 0
- 6
releasenotes/notes/build-timeout-bb68a1fd24f97a10.yaml View File

@@ -1,6 +0,0 @@
1
----
2
-features:
3
-  - |
4
-    A new option (build-timeout) has been added to the builder diskimage
5
-    configuration to control how long the builder should wait for image
6
-    builds before giving up. The default is 8 hours.

Loading…
Cancel
Save