Fix image build event race and add more image api tests
This change does two things: 1) It adds tests that verify that in the case where images are managed in a central repo, admins may not use the web api of the wrong tenant (ie, a tenant that is not actually managing the builds of these images) to trigger or delete a build or upload. In expanding the tests to include more than one tenant, a previously existing race condition was more reliably observed, so it is fixed: 2) The following race sequence could happen: [1] launcher triggers builds for 2 missing images [2] scheduler begins reporting the image buildset (2 jobs) [2] scheduler adds image build artifact #1 to registry [1] launcher receives signal from zk watch that the image registry was updated and checks for missing images [1] launcher observes that image #2 is still missing and submits an image build trigger [2] scheduler adds image build artifact #2 to registry [2] scheduler removes queue item [2] scheduler processes trigger event, begins new build of image #2 To resolve this, we note that the addition of multiple image artifacts is a critical section. We adjust the creation order so that the sequence is now: * Create artifact #1 with state=None * Create artifact #2 with state=None * Create upload #1 and update artifact #1 to ready * Create upload #2 and update artifact #2 to ready The critical section is now bounded by the condition of having any IBAs with state=None. We now check for that within the launcher and wait for it to clear before we decide if any images are missing. Change-Id: Iad1b68cf8c9cf6bd5f13dab0471e7bf5fd290fbf
This commit is contained in:
2
tests/fixtures/config/multi-tenant-provider/git/common-config/playbooks/base.yaml
vendored
Normal file
2
tests/fixtures/config/multi-tenant-provider/git/common-config/playbooks/base.yaml
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
- hosts: all
|
||||
tasks: []
|
||||
21
tests/fixtures/config/multi-tenant-provider/git/common-config/zuul-image.yaml
vendored
Normal file
21
tests/fixtures/config/multi-tenant-provider/git/common-config/zuul-image.yaml
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
- pipeline:
|
||||
name: image
|
||||
manager: independent
|
||||
trigger:
|
||||
zuul:
|
||||
- event: image-build
|
||||
success:
|
||||
zuul:
|
||||
image-built: true
|
||||
image-validated: true
|
||||
|
||||
- project:
|
||||
name: common-config
|
||||
image:
|
||||
jobs:
|
||||
# These jobs should run
|
||||
- build-debian-local-image
|
||||
- build-ubuntu-local-image
|
||||
# This job should not because we don't emit a trigger event
|
||||
# for this image.
|
||||
- build-fedora-local-image
|
||||
134
tests/fixtures/config/multi-tenant-provider/git/common-config/zuul.yaml
vendored
Normal file
134
tests/fixtures/config/multi-tenant-provider/git/common-config/zuul.yaml
vendored
Normal file
@@ -0,0 +1,134 @@
|
||||
- pipeline:
|
||||
name: check
|
||||
manager: independent
|
||||
trigger:
|
||||
gerrit:
|
||||
- event: patchset-created
|
||||
success:
|
||||
gerrit:
|
||||
Verified: 1
|
||||
failure:
|
||||
gerrit:
|
||||
Verified: -1
|
||||
|
||||
- pipeline:
|
||||
name: gate
|
||||
manager: dependent
|
||||
success-message: Build succeeded (gate).
|
||||
trigger:
|
||||
gerrit:
|
||||
- event: comment-added
|
||||
approval:
|
||||
- Approved: 1
|
||||
success:
|
||||
gerrit:
|
||||
Verified: 2
|
||||
submit: true
|
||||
failure:
|
||||
gerrit:
|
||||
Verified: -2
|
||||
start:
|
||||
gerrit:
|
||||
Verified: 0
|
||||
precedence: high
|
||||
|
||||
- pipeline:
|
||||
name: post
|
||||
manager: independent
|
||||
trigger:
|
||||
gerrit:
|
||||
- event: ref-updated
|
||||
ref: ^(?!refs/).*$
|
||||
|
||||
- pipeline:
|
||||
name: tag
|
||||
manager: independent
|
||||
trigger:
|
||||
gerrit:
|
||||
- event: ref-updated
|
||||
ref: ^refs/tags/.*$
|
||||
|
||||
- job:
|
||||
name: base
|
||||
parent: null
|
||||
run: playbooks/base.yaml
|
||||
nodeset:
|
||||
nodes:
|
||||
- label: ubuntu-xenial
|
||||
name: controller
|
||||
|
||||
- job:
|
||||
name: build-debian-local-image
|
||||
image-build-name: debian-local
|
||||
|
||||
- job:
|
||||
name: build-ubuntu-local-image
|
||||
image-build-name: ubuntu-local
|
||||
|
||||
- job:
|
||||
name: build-fedora-local-image
|
||||
image-build-name: fedora-local
|
||||
|
||||
- image:
|
||||
name: debian
|
||||
type: cloud
|
||||
|
||||
- image:
|
||||
name: debian-local
|
||||
type: zuul
|
||||
|
||||
- image:
|
||||
name: ubuntu-local
|
||||
type: zuul
|
||||
|
||||
- image:
|
||||
name: fedora-local
|
||||
type: zuul
|
||||
|
||||
- flavor:
|
||||
name: normal
|
||||
|
||||
- label:
|
||||
name: debian-normal
|
||||
image: debian
|
||||
flavor: normal
|
||||
|
||||
- label:
|
||||
name: debian-local-normal
|
||||
image: debian-local
|
||||
flavor: normal
|
||||
|
||||
# See zuul-image.yaml for the image build pipeline and config
|
||||
|
||||
- section:
|
||||
name: aws-base
|
||||
abstract: true
|
||||
connection: aws
|
||||
host-key-checking: false
|
||||
boot-timeout: 120
|
||||
launch-timeout: 600
|
||||
object-storage:
|
||||
bucket-name: zuul
|
||||
key-name: zuul
|
||||
flavors:
|
||||
- name: normal
|
||||
instance-type: t3.medium
|
||||
images:
|
||||
- name: debian
|
||||
image-id: ami-1e749f67
|
||||
- name: debian-local
|
||||
- name: ubuntu-local
|
||||
# Don't add fedora here, so that we don't emit a trigger event
|
||||
# for it.
|
||||
|
||||
- section:
|
||||
name: aws-us-east-1
|
||||
parent: aws-base
|
||||
region: us-east-1
|
||||
|
||||
- provider:
|
||||
name: aws-us-east-1-main
|
||||
section: aws-us-east-1
|
||||
labels:
|
||||
- name: debian-normal
|
||||
- name: debian-local-normal
|
||||
1
tests/fixtures/config/multi-tenant-provider/git/org_project1/README
vendored
Normal file
1
tests/fixtures/config/multi-tenant-provider/git/org_project1/README
vendored
Normal file
@@ -0,0 +1 @@
|
||||
test
|
||||
1
tests/fixtures/config/multi-tenant-provider/git/org_project2/README
vendored
Normal file
1
tests/fixtures/config/multi-tenant-provider/git/org_project2/README
vendored
Normal file
@@ -0,0 +1 @@
|
||||
test
|
||||
22
tests/fixtures/config/multi-tenant-provider/main.yaml
vendored
Normal file
22
tests/fixtures/config/multi-tenant-provider/main.yaml
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
- tenant:
|
||||
name: tenant-one
|
||||
source:
|
||||
gerrit:
|
||||
config-projects:
|
||||
- common-config:
|
||||
include: ['job', 'nodeset', 'secret', 'pipeline', 'project', 'project-template', 'semaphore', 'queue', 'image', 'flavor', 'label', 'section', 'provider']
|
||||
extra-config-paths: zuul-image.yaml
|
||||
- tenant-one-config
|
||||
untrusted-projects:
|
||||
- org/project1
|
||||
|
||||
- tenant:
|
||||
name: tenant-two
|
||||
source:
|
||||
gerrit:
|
||||
config-projects:
|
||||
- common-config:
|
||||
include: ['job', 'nodeset', 'secret', 'pipeline', 'project', 'project-template', 'semaphore', 'queue', 'image', 'flavor', 'label', 'section', 'provider']
|
||||
- tenant-two-config
|
||||
untrusted-projects:
|
||||
- org/project2
|
||||
@@ -1493,6 +1493,7 @@ class TestWeb(BaseTestWeb):
|
||||
|
||||
class TestWebProviders(LauncherBaseTestCase, WebMixin):
|
||||
config_file = 'zuul-connections-nodepool.conf'
|
||||
tenant_config_file = 'config/multi-tenant-provider/main.yaml'
|
||||
|
||||
@simple_layout('layouts/nodepool.yaml', enable_nodepool=True)
|
||||
def test_web_providers(self):
|
||||
@@ -1590,7 +1591,6 @@ class TestWebProviders(LauncherBaseTestCase, WebMixin):
|
||||
]
|
||||
self.assertEqual(expected, data)
|
||||
|
||||
@simple_layout('layouts/nodepool-image.yaml', enable_nodepool=True)
|
||||
@return_data(
|
||||
'build-debian-local-image',
|
||||
'refs/heads/master',
|
||||
@@ -1625,6 +1625,20 @@ class TestWebProviders(LauncherBaseTestCase, WebMixin):
|
||||
f"api/tenant/tenant-one/image-build-artifact/{art['uuid']}"
|
||||
)
|
||||
self.assertEqual(401, resp.status_code, resp.text)
|
||||
# Test that the wrong tenant fails, even with auth
|
||||
authz = {'iss': 'zuul_operator',
|
||||
'aud': 'zuul.example.com',
|
||||
'sub': 'testuser',
|
||||
'zuul': {
|
||||
'admin': ['tenant-two', ]
|
||||
},
|
||||
'exp': int(time.time()) + 3600}
|
||||
token = jwt.encode(authz, key='NoDanaOnlyZuul',
|
||||
algorithm='HS256')
|
||||
resp = self.delete_url(
|
||||
f"api/tenant/tenant-two/image-build-artifact/{art['uuid']}",
|
||||
headers={'Authorization': 'Bearer %s' % token})
|
||||
self.assertEqual(404, resp.status_code, resp.text)
|
||||
# Do it again with auth
|
||||
authz = {'iss': 'zuul_operator',
|
||||
'aud': 'zuul.example.com',
|
||||
@@ -1645,7 +1659,6 @@ class TestWebProviders(LauncherBaseTestCase, WebMixin):
|
||||
if 'build_artifacts' not in data[1]:
|
||||
break
|
||||
|
||||
@simple_layout('layouts/nodepool-image.yaml', enable_nodepool=True)
|
||||
@return_data(
|
||||
'build-debian-local-image',
|
||||
'refs/heads/master',
|
||||
@@ -1680,6 +1693,20 @@ class TestWebProviders(LauncherBaseTestCase, WebMixin):
|
||||
f"api/tenant/tenant-one/image-upload/{upload['uuid']}"
|
||||
)
|
||||
self.assertEqual(401, resp.status_code, resp.text)
|
||||
# Test that the wrong tenant fails, even with auth
|
||||
authz = {'iss': 'zuul_operator',
|
||||
'aud': 'zuul.example.com',
|
||||
'sub': 'testuser',
|
||||
'zuul': {
|
||||
'admin': ['tenant-two', ]
|
||||
},
|
||||
'exp': int(time.time()) + 3600}
|
||||
token = jwt.encode(authz, key='NoDanaOnlyZuul',
|
||||
algorithm='HS256')
|
||||
resp = self.delete_url(
|
||||
f"api/tenant/tenant-two/image-upload/{upload['uuid']}",
|
||||
headers={'Authorization': 'Bearer %s' % token})
|
||||
self.assertEqual(404, resp.status_code, resp.text)
|
||||
# Do it again with auth
|
||||
authz = {'iss': 'zuul_operator',
|
||||
'aud': 'zuul.example.com',
|
||||
@@ -1700,7 +1727,6 @@ class TestWebProviders(LauncherBaseTestCase, WebMixin):
|
||||
if 'build_artifacts' not in data[1]:
|
||||
break
|
||||
|
||||
@simple_layout('layouts/nodepool-image.yaml', enable_nodepool=True)
|
||||
@return_data(
|
||||
'build-debian-local-image',
|
||||
'refs/heads/master',
|
||||
@@ -1755,8 +1781,27 @@ class TestWebProviders(LauncherBaseTestCase, WebMixin):
|
||||
dict(name='build-ubuntu-local-image', result='SUCCESS'),
|
||||
dict(name='build-ubuntu-local-image', result='SUCCESS'),
|
||||
], ordered=False)
|
||||
# Try again with the wrong tenant
|
||||
authz = {'iss': 'zuul_operator',
|
||||
'aud': 'zuul.example.com',
|
||||
'sub': 'testuser',
|
||||
'zuul': {
|
||||
'admin': ['tenant-two', ]
|
||||
},
|
||||
'exp': int(time.time()) + 3600}
|
||||
token = jwt.encode(authz, key='NoDanaOnlyZuul',
|
||||
algorithm='HS256')
|
||||
resp = self.post_url(
|
||||
"api/tenant/tenant-two/image/ubuntu-local/build",
|
||||
headers={'Authorization': 'Bearer %s' % token})
|
||||
self.assertEqual(200, resp.status_code, resp.text)
|
||||
self.waitUntilSettled("image rebuild")
|
||||
self.assertHistory([
|
||||
dict(name='build-debian-local-image', result='SUCCESS'),
|
||||
dict(name='build-ubuntu-local-image', result='SUCCESS'),
|
||||
dict(name='build-ubuntu-local-image', result='SUCCESS'),
|
||||
], ordered=False)
|
||||
|
||||
@simple_layout('layouts/nodepool-image.yaml', enable_nodepool=True)
|
||||
@return_data(
|
||||
'build-debian-local-image',
|
||||
'refs/heads/master',
|
||||
|
||||
@@ -42,6 +42,7 @@ class ZuulReporter(BaseReporter):
|
||||
continue
|
||||
image_name = build.job.image_build_name
|
||||
image = item.manager.tenant.layout.images[image_name]
|
||||
ibas = []
|
||||
for artifact in get_artifacts_from_result_data(
|
||||
build.result_data,
|
||||
logger=self.log):
|
||||
@@ -51,7 +52,9 @@ class ZuulReporter(BaseReporter):
|
||||
item.manager.tenant.name, image, build,
|
||||
metadata, artifact['url'],
|
||||
self.image_validated)
|
||||
sched.createImageUploads(iba)
|
||||
ibas.append(iba)
|
||||
for iba in ibas:
|
||||
sched.createImageUploads(iba)
|
||||
|
||||
def reportImageValidated(self, item):
|
||||
sched = self.driver.sched
|
||||
|
||||
@@ -1969,8 +1969,32 @@ class Launcher:
|
||||
tenant_name, iba.name)
|
||||
self.trigger_events[tenant_name].put(event.trigger_name, event)
|
||||
|
||||
def _waitForStableImageRegistry(self):
|
||||
# Wait for any non-ready IBAs to become ready. This resolves
|
||||
# a race condition where the scheduler might have created the
|
||||
# first of more than one IBA in a buildset, and we were
|
||||
# immediately triggered. Wait until the scheduler is finished
|
||||
# so that if a buildset creates more than one IBA we have all
|
||||
# the info.
|
||||
now = time.time()
|
||||
while True:
|
||||
done = True
|
||||
for iba in self.image_build_registry.getAllArtifacts():
|
||||
if iba.state is not None:
|
||||
continue
|
||||
if now - iba.timestamp > 30:
|
||||
# We'll only wait 30 seconds for this so we don't
|
||||
# get stuck.
|
||||
continue
|
||||
done = False
|
||||
break
|
||||
if done:
|
||||
return
|
||||
time.sleep(1)
|
||||
|
||||
def checkMissingImages(self):
|
||||
self.log.debug("Checking for missing images")
|
||||
self._waitForStableImageRegistry()
|
||||
for tenant_name, providers in self.tenant_providers.items():
|
||||
images_by_project_branch = {}
|
||||
for provider in providers:
|
||||
|
||||
@@ -53,6 +53,18 @@ class ImageBuildRegistry(LockableZKObjectCache):
|
||||
arts = sorted(arts, key=lambda x: x.timestamp)
|
||||
return arts
|
||||
|
||||
def getAllArtifacts(self):
|
||||
keys = []
|
||||
for image_canonical_name in self.builds_by_image_name.keys():
|
||||
keys.extend(self.builds_by_image_name[image_canonical_name])
|
||||
arts = [self._cached_objects.get(key) for key in keys]
|
||||
arts = [a for a in arts if a is not None]
|
||||
# Sort in a stable order, primarily by timestamp, then format
|
||||
# for identical timestamps.
|
||||
arts = sorted(arts, key=lambda x: x.format)
|
||||
arts = sorted(arts, key=lambda x: x.timestamp)
|
||||
return arts
|
||||
|
||||
|
||||
class ImageUploadRegistry(LockableZKObjectCache):
|
||||
|
||||
|
||||
Reference in New Issue
Block a user