Fix multi-provider cleanup
The launcher's leaked resource cleanup method would overwrite the list of potential leaked resources after each endpoint; this means that only the last endpoint to run would actually have its resources cleaned up. Correct this by keeping a list of potential leaks indexed by endpoint. Change-Id: I961cb53f53ccf5fc39e467e1ddde3034a46f1952
This commit is contained in:
parent
cd0c1be98f
commit
aafe84d1b1
@ -84,9 +84,11 @@ class FakeOpenstackCloud:
|
||||
log = logging.getLogger("zuul.FakeOpenstackCloud")
|
||||
|
||||
def __init__(self,
|
||||
region,
|
||||
needs_floating_ip=False,
|
||||
auto_attach_floating_ip=True,
|
||||
):
|
||||
self._fake_region = region
|
||||
self._fake_needs_floating_ip = needs_floating_ip
|
||||
self._fake_auto_attach_floating_ip = auto_attach_floating_ip
|
||||
self.servers = []
|
||||
@ -153,7 +155,7 @@ class FakeOpenstackConnection:
|
||||
self.config = FakeOpenstackConfig()
|
||||
self.config.config = {}
|
||||
self.config.config['image_format'] = 'qcow2'
|
||||
self.config.config['region_name'] = 'region1'
|
||||
self.config.config['region_name'] = cloud._fake_region or 'region1'
|
||||
|
||||
def _needs_floating_ip(self, server, nat_destination):
|
||||
return self.cloud._fake_needs_floating_ip
|
||||
@ -302,7 +304,7 @@ class FakeOpenstackConnection:
|
||||
|
||||
class FakeOpenstackProviderEndpoint(OpenstackProviderEndpoint):
|
||||
def _getClient(self):
|
||||
return self._fake_cloud._getConnection()
|
||||
return self._fake_cloud[self.region]._getConnection()
|
||||
|
||||
def _expandServer(self, server):
|
||||
return server
|
||||
|
139
tests/fixtures/layouts/openstack/nodepool-multi.yaml
vendored
Normal file
139
tests/fixtures/layouts/openstack/nodepool-multi.yaml
vendored
Normal file
@ -0,0 +1,139 @@
|
||||
- pipeline:
|
||||
name: check
|
||||
manager: independent
|
||||
trigger:
|
||||
gerrit:
|
||||
- event: patchset-created
|
||||
success:
|
||||
gerrit:
|
||||
Verified: 1
|
||||
failure:
|
||||
gerrit:
|
||||
Verified: -1
|
||||
|
||||
- pipeline:
|
||||
name: gate
|
||||
manager: dependent
|
||||
success-message: Build succeeded (gate).
|
||||
trigger:
|
||||
gerrit:
|
||||
- event: comment-added
|
||||
approval:
|
||||
- Approved: 1
|
||||
success:
|
||||
gerrit:
|
||||
Verified: 2
|
||||
submit: true
|
||||
failure:
|
||||
gerrit:
|
||||
Verified: -2
|
||||
start:
|
||||
gerrit:
|
||||
Verified: 0
|
||||
precedence: high
|
||||
|
||||
- pipeline:
|
||||
name: post
|
||||
manager: independent
|
||||
trigger:
|
||||
gerrit:
|
||||
- event: ref-updated
|
||||
ref: ^(?!refs/).*$
|
||||
|
||||
- pipeline:
|
||||
name: tag
|
||||
manager: independent
|
||||
trigger:
|
||||
gerrit:
|
||||
- event: ref-updated
|
||||
ref: ^refs/tags/.*$
|
||||
|
||||
- job:
|
||||
name: base
|
||||
parent: null
|
||||
run: playbooks/base.yaml
|
||||
nodeset:
|
||||
nodes:
|
||||
- label: debian-normal
|
||||
name: controller
|
||||
|
||||
- job:
|
||||
name: check-job
|
||||
run: playbooks/check.yaml
|
||||
|
||||
- job:
|
||||
name: post-job
|
||||
run: playbooks/post.yaml
|
||||
|
||||
- job:
|
||||
name: tag-job
|
||||
run: playbooks/tag.yaml
|
||||
|
||||
- project:
|
||||
name: org/project
|
||||
check:
|
||||
jobs:
|
||||
- check-job
|
||||
gate:
|
||||
jobs:
|
||||
- check-job
|
||||
post:
|
||||
jobs:
|
||||
- post-job
|
||||
tag:
|
||||
jobs:
|
||||
- tag-job
|
||||
|
||||
- image:
|
||||
name: debian
|
||||
type: cloud
|
||||
|
||||
- flavor:
|
||||
name: normal
|
||||
|
||||
- label:
|
||||
name: debian-normal
|
||||
image: debian
|
||||
flavor: normal
|
||||
|
||||
- section:
|
||||
name: openstack-base
|
||||
abstract: true
|
||||
connection: openstack
|
||||
host-key-checking: false
|
||||
boot-timeout: 120
|
||||
launch-timeout: 600
|
||||
launch-attempts: 2
|
||||
key-name: zuul
|
||||
floating-ip-cleanup: true
|
||||
port-cleanup-interval: 1
|
||||
flavors:
|
||||
- name: normal
|
||||
flavor-name: Fake Flavor
|
||||
images:
|
||||
- name: debian
|
||||
image-id: fake-image
|
||||
networks:
|
||||
- fake-network
|
||||
|
||||
- section:
|
||||
name: openstack
|
||||
parent: openstack-base
|
||||
|
||||
- provider:
|
||||
name: openstack-main
|
||||
section: openstack
|
||||
labels:
|
||||
- name: debian-normal
|
||||
|
||||
- section:
|
||||
name: openstack-extra
|
||||
parent: openstack-base
|
||||
region: region2
|
||||
|
||||
- provider:
|
||||
name: openstack-extra-main
|
||||
section: openstack-extra
|
||||
labels: []
|
||||
images: []
|
||||
flavors: []
|
@ -291,7 +291,8 @@ class TestAwsDriver(BaseCloudDriverTest):
|
||||
def test_aws_diskimage_ebs_direct(self):
|
||||
self._test_diskimage()
|
||||
|
||||
@simple_layout('layouts/nodepool.yaml', enable_nodepool=True)
|
||||
@simple_layout('layouts/nodepool-multi-provider.yaml',
|
||||
enable_nodepool=True)
|
||||
def test_aws_resource_cleanup(self):
|
||||
self.waitUntilSettled()
|
||||
self.launcher.cleanup_worker.INTERVAL = 1
|
||||
|
@ -63,27 +63,34 @@ class BaseOpenstackDriverTest(ZuulTestCase):
|
||||
openstack_needs_floating_ip = False
|
||||
openstack_auto_attach_floating_ip = True
|
||||
|
||||
def _makeFakeCloud(self, region):
|
||||
fake_cloud = FakeOpenstackCloud(
|
||||
region,
|
||||
needs_floating_ip=self.openstack_needs_floating_ip,
|
||||
auto_attach_floating_ip=self.openstack_auto_attach_floating_ip,
|
||||
)
|
||||
fake_cloud.max_instances =\
|
||||
self.test_config.driver.openstack.get('max_instances', 100)
|
||||
fake_cloud.max_cores =\
|
||||
self.test_config.driver.openstack.get('max_cores', 100)
|
||||
fake_cloud.max_ram =\
|
||||
self.test_config.driver.openstack.get('max_ram', 1000000)
|
||||
fake_cloud.max_volumes =\
|
||||
self.test_config.driver.openstack.get('max_volumes', 100)
|
||||
fake_cloud.max_volume_gb =\
|
||||
self.test_config.driver.openstack.get('max_volume_gb', 100)
|
||||
self.fake_cloud[region] = fake_cloud
|
||||
|
||||
def setUp(self):
|
||||
self.initTestConfig()
|
||||
self.useFixture(ImageMocksFixture())
|
||||
clouds_yaml = os.path.join(FIXTURE_DIR, 'clouds.yaml')
|
||||
self.useFixture(
|
||||
fixtures.EnvironmentVariable('OS_CLIENT_CONFIG_FILE', clouds_yaml))
|
||||
self.fake_cloud = FakeOpenstackCloud(
|
||||
needs_floating_ip=self.openstack_needs_floating_ip,
|
||||
auto_attach_floating_ip=self.openstack_auto_attach_floating_ip,
|
||||
)
|
||||
self.fake_cloud.max_instances =\
|
||||
self.test_config.driver.openstack.get('max_instances', 100)
|
||||
self.fake_cloud.max_cores =\
|
||||
self.test_config.driver.openstack.get('max_cores', 100)
|
||||
self.fake_cloud.max_ram =\
|
||||
self.test_config.driver.openstack.get('max_ram', 1000000)
|
||||
self.fake_cloud.max_volumes =\
|
||||
self.test_config.driver.openstack.get('max_volumes', 100)
|
||||
self.fake_cloud.max_volume_gb =\
|
||||
self.test_config.driver.openstack.get('max_volume_gb', 100)
|
||||
|
||||
self.fake_cloud = {}
|
||||
self._makeFakeCloud(None)
|
||||
self._makeFakeCloud('region2')
|
||||
self.patch(OpenstackDriver, '_endpoint_class',
|
||||
FakeOpenstackProviderEndpoint)
|
||||
self.patch(FakeOpenstackProviderEndpoint,
|
||||
@ -136,11 +143,12 @@ class TestOpenstackDriver(BaseOpenstackDriverTest, BaseCloudDriverTest):
|
||||
|
||||
# Openstack-driver specific tests
|
||||
|
||||
@simple_layout('layouts/openstack/nodepool.yaml', enable_nodepool=True)
|
||||
@simple_layout('layouts/openstack/nodepool-multi.yaml',
|
||||
enable_nodepool=True)
|
||||
def test_openstack_resource_cleanup(self):
|
||||
self.waitUntilSettled()
|
||||
self.launcher.cleanup_worker.INTERVAL = 1
|
||||
conn = self.fake_cloud._getConnection()
|
||||
conn = self.fake_cloud[None]._getConnection()
|
||||
system_id = self.launcher.system.system_id
|
||||
tags = {
|
||||
'zuul_system_id': system_id,
|
||||
|
@ -681,6 +681,7 @@ class CleanupWorker:
|
||||
def __init__(self, launcher):
|
||||
self.launcher = launcher
|
||||
self.wake_event = threading.Event()
|
||||
# These are indexed by endpoint cname
|
||||
self.possibly_leaked_nodes = {}
|
||||
self.possibly_leaked_uploads = {}
|
||||
self._running = False
|
||||
@ -748,6 +749,10 @@ class CleanupWorker:
|
||||
def cleanupLeakedResources(self, endpoint):
|
||||
newly_leaked_nodes = {}
|
||||
newly_leaked_uploads = {}
|
||||
possibly_leaked_nodes = self.possibly_leaked_nodes.get(
|
||||
endpoint.canonical_name, {})
|
||||
possibly_leaked_uploads = self.possibly_leaked_uploads.get(
|
||||
endpoint.canonical_name, {})
|
||||
|
||||
# Get a list of all providers that share this endpoint. This
|
||||
# is because some providers may store resources like image
|
||||
@ -765,7 +770,7 @@ class CleanupWorker:
|
||||
upload_id = resource.metadata.get('zuul_upload_uuid')
|
||||
if node_id and self.launcher.api.getProviderNode(node_id) is None:
|
||||
newly_leaked_nodes[node_id] = resource
|
||||
if node_id in self.possibly_leaked_nodes:
|
||||
if node_id in possibly_leaked_nodes:
|
||||
# We've seen this twice now, so it's not a race
|
||||
# condition.
|
||||
try:
|
||||
@ -782,7 +787,7 @@ class CleanupWorker:
|
||||
self.launcher.image_upload_registry.getItem(
|
||||
upload_id) is None):
|
||||
newly_leaked_uploads[upload_id] = resource
|
||||
if upload_id in self.possibly_leaked_uploads:
|
||||
if upload_id in possibly_leaked_uploads:
|
||||
# We've seen this twice now, so it's not a race
|
||||
# condition.
|
||||
try:
|
||||
@ -796,8 +801,10 @@ class CleanupWorker:
|
||||
self.log.exception(
|
||||
"Unable to delete leaked "
|
||||
f"resource for upload {upload_id}")
|
||||
self.possibly_leaked_nodes = newly_leaked_nodes
|
||||
self.possibly_leaked_uploads = newly_leaked_uploads
|
||||
self.possibly_leaked_nodes[endpoint.canonical_name] =\
|
||||
newly_leaked_nodes
|
||||
self.possibly_leaked_uploads[endpoint.canonical_name] =\
|
||||
newly_leaked_uploads
|
||||
|
||||
|
||||
class Launcher:
|
||||
|
Loading…
x
Reference in New Issue
Block a user