From e2dac0ef415c51792f1f779c05a9c439ae5f5265 Mon Sep 17 00:00:00 2001 From: Steve Baker Date: Wed, 31 Jan 2018 13:45:13 +1300 Subject: [PATCH] Make container image upload more resilient This change implements a retry loop for the image push, just like the existing one for pull. This will be necessary for some non-undercloud registries which might periodically fail on some pushes. This change also determines the upload worker count to be half of the CPU count, with a minimum of two. This is to make CI more resilient on smaller (6 core) flavors. A docker pull causes high CPU load in dockerd, as the download of each layer is spread across dockerd's workers. Change-Id: Ia30658e3283d4b69d2bd8b0dddd375e1918169d3 Closes-Bug: #1746305 --- tripleo_common/image/image_uploader.py | 34 ++++++++++++++-- .../tests/image/test_image_uploader.py | 39 +++++++++++++++++++ 2 files changed, 69 insertions(+), 4 deletions(-) diff --git a/tripleo_common/image/image_uploader.py b/tripleo_common/image/image_uploader.py index 1827a8549..f73d46669 100644 --- a/tripleo_common/image/image_uploader.py +++ b/tripleo_common/image/image_uploader.py @@ -31,6 +31,7 @@ try: from docker import APIClient as Client except ImportError: from docker import Client +from oslo_concurrency import processutils from tripleo_common.image.base import BaseImageManager from tripleo_common.image.exception import ImageUploaderException @@ -179,9 +180,7 @@ class DockerImageUploader(ImageUploader): tag=tag, force=True) LOG.debug(response) - response = [line for line in dockerc.push(new_repo, - tag=tag, stream=True)] - LOG.debug(response) + DockerImageUploader._push_retry(dockerc, new_repo, tag=tag) LOG.info('Completed upload for image %s' % image_name) return full_image, full_new_repo @@ -211,6 +210,31 @@ class DockerImageUploader(ImageUploader): time.sleep(3) LOG.warning('retrying pulling image: %s' % image) + @staticmethod + def _push(dockerc, image, tag=None): + LOG.debug('Pushing %s' % image) + + for line in dockerc.push(image, tag=tag, stream=True): + status = json.loads(line) + if 'error' in status: + LOG.warning('docker push failed: %s' % status['error']) + return 1 + LOG.debug(status.get('status')) + return 0 + + @staticmethod + def _push_retry(dockerc, image, tag=None): + retval = -1 + count = 0 + while retval != 0: + if count >= 5: + raise ImageUploaderException('Could not push image %s' % image) + count += 1 + retval = DockerImageUploader._push(dockerc, image, tag) + if retval != 0: + time.sleep(3) + LOG.warning('retrying pushing image: %s' % image) + @staticmethod def _images_match(image1, image2, insecure_registries): try: @@ -360,7 +384,9 @@ class DockerImageUploader(ImageUploader): result = self.upload_image(*first) local_images.extend(result) - p = multiprocessing.Pool(4) + # workers will be half the CPU count, to a minimum of 2 + workers = max(2, processutils.get_worker_count() // 2) + p = multiprocessing.Pool(workers) for result in p.map(docker_upload, self.upload_tasks): local_images.extend(result) diff --git a/tripleo_common/tests/image/test_image_uploader.py b/tripleo_common/tests/image/test_image_uploader.py index 9d525ff49..43e3ab2c2 100644 --- a/tripleo_common/tests/image/test_image_uploader.py +++ b/tripleo_common/tests/image/test_image_uploader.py @@ -427,6 +427,45 @@ class TestDockerImageUploader(base.TestCase): mock.call(image, tag=None, stream=True) ]) + @mock.patch('time.sleep') + def test_push_retry(self, sleep_mock): + image = 'docker.io/tripleoupstream/heat-docker-agents-centos' + + dockerc = self.dockermock.return_value + dockerc.push.side_effect = [ + ['{"error": "ouch"}'], + ['{"error": "ouch"}'], + ['{"error": "ouch"}'], + ['{"error": "ouch"}'], + ['{"status": "done"}'] + ] + self.uploader._push_retry(dockerc, image) + + self.assertEqual(sleep_mock.call_count, 4) + dockerc.push.assert_has_calls([ + mock.call(image, tag=None, stream=True) + ]) + + @mock.patch('time.sleep') + def test_push_retry_failure(self, sleep_mock): + image = 'docker.io/tripleoupstream/heat-docker-agents-centos' + + dockerc = self.dockermock.return_value + dockerc.push.side_effect = [ + ['{"error": "ouch"}'], + ['{"error": "ouch"}'], + ['{"error": "ouch"}'], + ['{"error": "ouch"}'], + ['{"error": "ouch"}'], + ] + self.assertRaises(ImageUploaderException, + self.uploader._push_retry, dockerc, image) + + self.assertEqual(sleep_mock.call_count, 5) + dockerc.push.assert_has_calls([ + mock.call(image, tag=None, stream=True) + ]) + @mock.patch('tripleo_common.image.image_uploader.' 'DockerImageUploader._inspect') def test_images_match(self, mock_inspect):