Make container image upload more resilient

This change implements a retry loop for the image push, just like the
existing one for pull. This will be necessary for some non-undercloud
registries which might periodically fail on some pushes.

This change also determines the upload worker count to be half of the
CPU count, with a minimum of two. This is to make CI more resilient on
smaller (6 core) flavors. A docker pull causes high CPU load in
dockerd, as the download of each layer is spread across dockerd's
workers.

Change-Id: Ia30658e3283d4b69d2bd8b0dddd375e1918169d3
Closes-Bug: #1746305
This commit is contained in:
Steve Baker 2018-01-31 13:45:13 +13:00
parent 3757a44a30
commit e2dac0ef41
2 changed files with 69 additions and 4 deletions

View File

@ -31,6 +31,7 @@ try:
from docker import APIClient as Client
except ImportError:
from docker import Client
from oslo_concurrency import processutils
from tripleo_common.image.base import BaseImageManager
from tripleo_common.image.exception import ImageUploaderException
@ -179,9 +180,7 @@ class DockerImageUploader(ImageUploader):
tag=tag, force=True)
LOG.debug(response)
response = [line for line in dockerc.push(new_repo,
tag=tag, stream=True)]
LOG.debug(response)
DockerImageUploader._push_retry(dockerc, new_repo, tag=tag)
LOG.info('Completed upload for image %s' % image_name)
return full_image, full_new_repo
@ -211,6 +210,31 @@ class DockerImageUploader(ImageUploader):
time.sleep(3)
LOG.warning('retrying pulling image: %s' % image)
@staticmethod
def _push(dockerc, image, tag=None):
LOG.debug('Pushing %s' % image)
for line in dockerc.push(image, tag=tag, stream=True):
status = json.loads(line)
if 'error' in status:
LOG.warning('docker push failed: %s' % status['error'])
return 1
LOG.debug(status.get('status'))
return 0
@staticmethod
def _push_retry(dockerc, image, tag=None):
retval = -1
count = 0
while retval != 0:
if count >= 5:
raise ImageUploaderException('Could not push image %s' % image)
count += 1
retval = DockerImageUploader._push(dockerc, image, tag)
if retval != 0:
time.sleep(3)
LOG.warning('retrying pushing image: %s' % image)
@staticmethod
def _images_match(image1, image2, insecure_registries):
try:
@ -360,7 +384,9 @@ class DockerImageUploader(ImageUploader):
result = self.upload_image(*first)
local_images.extend(result)
p = multiprocessing.Pool(4)
# workers will be half the CPU count, to a minimum of 2
workers = max(2, processutils.get_worker_count() // 2)
p = multiprocessing.Pool(workers)
for result in p.map(docker_upload, self.upload_tasks):
local_images.extend(result)

View File

@ -427,6 +427,45 @@ class TestDockerImageUploader(base.TestCase):
mock.call(image, tag=None, stream=True)
])
@mock.patch('time.sleep')
def test_push_retry(self, sleep_mock):
image = 'docker.io/tripleoupstream/heat-docker-agents-centos'
dockerc = self.dockermock.return_value
dockerc.push.side_effect = [
['{"error": "ouch"}'],
['{"error": "ouch"}'],
['{"error": "ouch"}'],
['{"error": "ouch"}'],
['{"status": "done"}']
]
self.uploader._push_retry(dockerc, image)
self.assertEqual(sleep_mock.call_count, 4)
dockerc.push.assert_has_calls([
mock.call(image, tag=None, stream=True)
])
@mock.patch('time.sleep')
def test_push_retry_failure(self, sleep_mock):
image = 'docker.io/tripleoupstream/heat-docker-agents-centos'
dockerc = self.dockermock.return_value
dockerc.push.side_effect = [
['{"error": "ouch"}'],
['{"error": "ouch"}'],
['{"error": "ouch"}'],
['{"error": "ouch"}'],
['{"error": "ouch"}'],
]
self.assertRaises(ImageUploaderException,
self.uploader._push_retry, dockerc, image)
self.assertEqual(sleep_mock.call_count, 5)
dockerc.push.assert_has_calls([
mock.call(image, tag=None, stream=True)
])
@mock.patch('tripleo_common.image.image_uploader.'
'DockerImageUploader._inspect')
def test_images_match(self, mock_inspect):