From 0be1be779a27d7bb3ba8f5469e391e4c72eee685 Mon Sep 17 00:00:00 2001 From: Kevin Carter Date: Thu, 11 Jul 2019 17:54:39 -0500 Subject: [PATCH] Ensure we're raising proper exceptions This change ensures our use of the `process.execute` method from "oslo_concurrency" always checks the return codes of a given command. While the library is assumed to do this already, this change enforces our expected behavior. We're also changing our use of futures to return when there's an exception. The build_all method was blocking until all jobs were completed. Blocking on pipeline completion results in us masking exceptions that may be raised duing the job execution. To ensure we're capturing errors in our build process the wait function now return on the first exception; More on the futures constants can be seen here:[0]. The return values will now be evaluated to ensure all jobs completed successfully. Unit tests have been added to ensure we're not breaking the build all method and that it raise appropriate exceptions when errors are encountered. [0] - https://docs.python.org/3/library/concurrent.futures.html#module-functions Related-Bug: #1836265 Change-Id: Ia05140142fa59e5b252cd92801244e4fc02f4bbc Signed-off-by: Kevin Carter --- tripleo_common/constants.py | 4 +- tripleo_common/image/builder/buildah.py | 30 +++++- .../tests/image/builder/test_buildah.py | 102 +++++++++++++++++- 3 files changed, 130 insertions(+), 6 deletions(-) diff --git a/tripleo_common/constants.py b/tripleo_common/constants.py index 2779711f2..3c24a78e4 100644 --- a/tripleo_common/constants.py +++ b/tripleo_common/constants.py @@ -69,8 +69,8 @@ LOCAL_CACERT_PATH = '/etc/pki/ca-trust/source/anchors/cm-local-ca.pem' # Swift via SwiftPlanStorageBackend to identify them from other containers TRIPLEO_META_USAGE_KEY = 'x-container-meta-usage-tripleo' -# 30 minutes maximum to build the child layers at the same time. -BUILD_TIMEOUT = 1800 +# 45 minutes maximum to build the child layers at the same time. +BUILD_TIMEOUT = 2700 #: List of names of parameters that contain passwords PASSWORD_PARAMETER_NAMES = ( diff --git a/tripleo_common/image/builder/buildah.py b/tripleo_common/image/builder/buildah.py index 9818ffec7..d9c2f6c80 100644 --- a/tripleo_common/image/builder/buildah.py +++ b/tripleo_common/image/builder/buildah.py @@ -126,7 +126,12 @@ class BuildahBuilder(base.BaseBuilder): logfile, '-t', destination, container_build_path] print("Building %s image with: %s" % (container_name, ' '.join(args))) - process.execute(*args, run_as_root=False, use_standard_locale=True) + process.execute( + *args, + check_exit_code=True, + run_as_root=False, + use_standard_locale=True + ) def push(self, destination): """Push an image to a container registry. @@ -153,6 +158,7 @@ class BuildahBuilder(base.BaseBuilder): if deps is None: deps = self.deps + if isinstance(deps, (list,)): # Only a list of images can be multi-processed because they # are the last layer to build. Otherwise we could have issues @@ -165,8 +171,26 @@ class BuildahBuilder(base.BaseBuilder): future_to_build = {executor.submit(self.build_all, container): container for container in deps} - futures.wait(future_to_build, timeout=self.build_timeout, - return_when=futures.ALL_COMPLETED) + done, not_done = futures.wait( + future_to_build, + timeout=self.build_timeout, + return_when=futures.FIRST_EXCEPTION + ) + # NOTE(cloudnull): Once the job has been completed all completed + # jobs are checked for exceptions. If any jobs + # failed a SystemError will be raised using the + # exception information. If any job was loaded + # but not executed a SystemError will be raised. + for job in done: + if job._exception: + raise SystemError(job.exception_info) + else: + if not_done: + raise SystemError( + 'The following jobs were incomplete: {}'.format( + not_done + ) + ) elif isinstance(deps, (dict,)): for container in deps: self._generate_container(container) diff --git a/tripleo_common/tests/image/builder/test_buildah.py b/tripleo_common/tests/image/builder/test_buildah.py index a277550f8..8de989478 100644 --- a/tripleo_common/tests/image/builder/test_buildah.py +++ b/tripleo_common/tests/image/builder/test_buildah.py @@ -17,6 +17,9 @@ import copy import mock +from concurrent import futures +from concurrent.futures import ThreadPoolExecutor as tpe + from tripleo_common.image.builder.buildah import BuildahBuilder as bb from tripleo_common.tests import base from tripleo_common.utils import process @@ -25,6 +28,54 @@ from tripleo_common.utils import process BUILDAH_CMD_BASE = ['sudo', 'buildah'] DEPS = {"base"} WORK_DIR = '/tmp/kolla' +BUILD_ALL_LIST_CONTAINERS = ['container1', 'container2', 'container3'] +BUILD_ALL_DICT_CONTAINERS = { + 'container1': {}, + 'container2': {}, + 'container3': {} +} +BUILD_ALL_STR_CONTAINER = 'container1' + + +class ThreadPoolExecutorReturn(object): + pass + + +class ThreadPoolExecutorReturnFailed(object): + _exception = True + exception_info = "This is a test failure" + + +class ThreadPoolExecutorReturnSuccess(object): + _exception = False + + +# Return values for the ThreadPoolExecutor +R_FAILED = ( + ( + ThreadPoolExecutorReturnSuccess, + ThreadPoolExecutorReturnSuccess, + ThreadPoolExecutorReturnFailed + ), + set() +) +R_OK = ( + ( + ThreadPoolExecutorReturnSuccess, + ThreadPoolExecutorReturnSuccess, + ThreadPoolExecutorReturnSuccess + ), + set() +) +R_BROKEN = ( + ( + ThreadPoolExecutorReturnSuccess, + ), + ( + ThreadPoolExecutorReturn, + ThreadPoolExecutorReturn + ) +) class TestBuildahBuilder(base.TestCase): @@ -40,7 +91,10 @@ class TestBuildahBuilder(base.TestCase): args.extend(buildah_cmd_build) bb(WORK_DIR, DEPS).build('fedora-base', container_build_path) mock_process.assert_called_once_with( - *args, run_as_root=False, use_standard_locale=True + *args, + check_exit_code=True, + run_as_root=False, + use_standard_locale=True ) @mock.patch.object(process, 'execute', autospec=True) @@ -74,3 +128,49 @@ class TestBuildahBuilder(base.TestCase): builder._generate_container(container_name) mock_build.assert_called_once_with(builder, container_name, "") assert not mock_push.called + + @mock.patch.object(tpe, 'submit', autospec=True) + @mock.patch.object(futures, 'wait', autospec=True, return_value=R_BROKEN) + @mock.patch.object(process, 'execute', autospec=True) + def test_build_all_list_broken(self, mock_submit, mock_wait, mock_build): + _b = bb(WORK_DIR, DEPS) + self.assertRaises( + SystemError, + _b.build_all, + deps=BUILD_ALL_LIST_CONTAINERS + ) + + @mock.patch.object(tpe, 'submit', autospec=True) + @mock.patch.object(futures, 'wait', autospec=True, return_value=R_FAILED) + @mock.patch.object(process, 'execute', autospec=True) + def test_build_all_list_failed(self, mock_submit, mock_wait, mock_build): + _b = bb(WORK_DIR, DEPS) + self.assertRaises( + SystemError, + _b.build_all, + deps=BUILD_ALL_LIST_CONTAINERS + ) + + @mock.patch.object(tpe, 'submit', autospec=True) + @mock.patch.object(futures, 'wait', autospec=True, return_value=R_OK) + @mock.patch.object(process, 'execute', autospec=True) + def test_build_all_list_ok(self, mock_submit, mock_wait, mock_build): + bb(WORK_DIR, DEPS).build_all(deps=BUILD_ALL_LIST_CONTAINERS) + + @mock.patch.object(tpe, 'submit', autospec=True) + @mock.patch.object(futures, 'wait', autospec=True, return_value=R_OK) + @mock.patch.object(process, 'execute', autospec=True) + def test_build_all_ok_no_deps(self, mock_submit, mock_wait, mock_build): + bb(WORK_DIR, DEPS).build_all() + + @mock.patch.object(tpe, 'submit', autospec=True) + @mock.patch.object(futures, 'wait', autospec=True, return_value=R_OK) + @mock.patch.object(process, 'execute', autospec=True) + def test_build_all_dict_ok(self, mock_submit, mock_wait, mock_build): + bb(WORK_DIR, DEPS).build_all(deps=BUILD_ALL_DICT_CONTAINERS) + + @mock.patch.object(tpe, 'submit', autospec=True) + @mock.patch.object(futures, 'wait', autospec=True, return_value=R_OK) + @mock.patch.object(process, 'execute', autospec=True) + def test_build_all_str_ok(self, mock_submit, mock_wait, mock_build): + bb(WORK_DIR, DEPS).build_all(deps=BUILD_ALL_STR_CONTAINER)