From 687f683199fdb2239296d90a35c73af0783838cb Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Thu, 24 Oct 2024 15:15:56 -0700 Subject: [PATCH] Launcher: fix deleting servers The openstack and aws drivers both had some minor errors preventing instance deletion from working. Fix those and add test assertions. Change-Id: I4724ee3e17820dbe0a4029409f49190924b17f83 --- tests/fake_openstack.py | 1 + tests/unit/test_aws_driver.py | 3 +++ tests/unit/test_cloud_driver.py | 21 +++++++++++++++++ tests/unit/test_openstack_driver.py | 4 ++++ zuul/driver/aws/awsendpoint.py | 6 +++-- zuul/driver/openstack/openstackendpoint.py | 26 +++++++++++++--------- 6 files changed, 48 insertions(+), 13 deletions(-) diff --git a/tests/fake_openstack.py b/tests/fake_openstack.py index 72a9c42342..f8f0ab3d97 100644 --- a/tests/fake_openstack.py +++ b/tests/fake_openstack.py @@ -196,6 +196,7 @@ class FakeOpenstackConnection: status='ACTIVE', addresses=addresses, interface_ip=interface_ip, + flavor=flavor, ) server = FakeOpenstackServer(**args) self.cloud.servers.append(server) diff --git a/tests/unit/test_aws_driver.py b/tests/unit/test_aws_driver.py index db76152ec7..d146d022f5 100644 --- a/tests/unit/test_aws_driver.py +++ b/tests/unit/test_aws_driver.py @@ -23,6 +23,7 @@ import boto3 from zuul.driver.aws import AwsDriver from zuul.driver.aws.awsmodel import AwsProviderNode +import zuul.driver.aws.awsendpoint from tests.fake_aws import FakeAws, FakeAwsProviderEndpoint from tests.base import ( @@ -37,6 +38,7 @@ from tests.unit.test_cloud_driver import BaseCloudDriverTest class TestAwsDriver(BaseCloudDriverTest): config_file = 'zuul-connections-nodepool.conf' cloud_test_image_format = 'raw' + cloud_test_provider_name = 'aws-us-east-1-main' mock_aws = mock_aws() debian_return_data = { 'zuul': { @@ -65,6 +67,7 @@ class TestAwsDriver(BaseCloudDriverTest): fixtures.EnvironmentVariable('AWS_ACCESS_KEY_ID', aws_id)) self.useFixture( fixtures.EnvironmentVariable('AWS_SECRET_ACCESS_KEY', aws_key)) + self.patch(zuul.driver.aws.awsendpoint, 'CACHE_TTL', 1) # Moto doesn't handle some aspects of instance creation, so we # intercept and log the calls. diff --git a/tests/unit/test_cloud_driver.py b/tests/unit/test_cloud_driver.py index 09de688e74..d2a869b17a 100644 --- a/tests/unit/test_cloud_driver.py +++ b/tests/unit/test_cloud_driver.py @@ -27,6 +27,14 @@ from tests.base import ( class BaseCloudDriverTest(ZuulTestCase): cloud_test_connection_type = 'ssh' cloud_test_image_format = '' + cloud_test_provider_name = '' + + def _getEndpoint(self): + # Use the launcher provider so that we're using the same ttl + # method caches. + for provider in self.launcher.tenant_providers['tenant-one']: + if provider.name == self.cloud_test_provider_name: + return provider.getEndpoint() def _assertProviderNodeAttributes(self, pnode): self.assertEqual(pnode.connection_type, @@ -35,6 +43,12 @@ class BaseCloudDriverTest(ZuulTestCase): def _test_node_lifecycle(self, label): # Call this in a test to run a node lifecycle + for _ in iterate_timeout( + 30, "scheduler and launcher to have the same layout"): + if (self.scheds.first.sched.local_layout_state.get("tenant-one") == + self.launcher.local_layout_state.get("tenant-one")): + break + endpoint = self._getEndpoint() nodeset = model.NodeSet() nodeset.addNode(model.Node("node", label)) @@ -60,6 +74,7 @@ class BaseCloudDriverTest(ZuulTestCase): self.assertTrue(pnode.hasLock()) self._assertProviderNodeAttributes(pnode) + self.assertGreater(len(list(endpoint.listInstances())), 0) client.useNodeset(nodeset) self.waitUntilSettled() @@ -82,6 +97,12 @@ class BaseCloudDriverTest(ZuulTestCase): except NoNodeError: break + # Iterate here because the aws driver (at least) performs + # delayed async deletes. + for _ in iterate_timeout(60, "instances to be deleted"): + if len(list(endpoint.listInstances())) == 0: + break + def _test_diskimage(self): self.waitUntilSettled() self.assertHistory([ diff --git a/tests/unit/test_openstack_driver.py b/tests/unit/test_openstack_driver.py index 0484743094..71a83de3bd 100644 --- a/tests/unit/test_openstack_driver.py +++ b/tests/unit/test_openstack_driver.py @@ -17,6 +17,7 @@ import os import fixtures from zuul.driver.openstack import OpenstackDriver +import zuul.driver.openstack.openstackendpoint from tests.fake_openstack import ( FakeOpenstackCloud, @@ -34,6 +35,7 @@ from tests.unit.test_cloud_driver import BaseCloudDriverTest class BaseOpenstackDriverTest(ZuulTestCase): cloud_test_image_format = 'qcow2' + cloud_test_provider_name = 'openstack-main' config_file = 'zuul-connections-nodepool.conf' debian_return_data = { 'zuul': { @@ -70,6 +72,8 @@ class BaseOpenstackDriverTest(ZuulTestCase): FakeOpenstackProviderEndpoint) self.patch(FakeOpenstackProviderEndpoint, '_fake_cloud', self.fake_cloud) + self.patch(zuul.driver.openstack.openstackendpoint, + 'CACHE_TTL', 1) super().setUp() def tearDown(self): diff --git a/zuul/driver/aws/awsendpoint.py b/zuul/driver/aws/awsendpoint.py index 7cf370334b..72e76e4e0a 100644 --- a/zuul/driver/aws/awsendpoint.py +++ b/zuul/driver/aws/awsendpoint.py @@ -70,6 +70,7 @@ class AwsDeleteStateMachine(statemachine.StateMachine): def __init__(self, endpoint, node, log): self.log = log self.endpoint = endpoint + self.node = node super().__init__(node.delete_state) # Restore local objects @@ -93,10 +94,11 @@ class AwsDeleteStateMachine(statemachine.StateMachine): if self.state == self.INSTANCE_DELETING_START: self.instance = self.endpoint._deleteInstance( - self.instance, self.log) + self.node.aws_instance_id, self.log) self.state = self.INSTANCE_DELETING if self.state == self.INSTANCE_DELETING: + self.instance = self.endpoint._refreshDelete(self.instance) if self.instance is None: if self.host: self.state = self.HOST_RELEASING_START @@ -105,7 +107,7 @@ class AwsDeleteStateMachine(statemachine.StateMachine): if self.state == self.HOST_RELEASING_START: self.host = self.endpoint._releaseHost( - self.host, self.log) + self.node.aws_dedicated_host_id, self.log) self.state = self.HOST_RELEASING if self.state == self.HOST_RELEASING: diff --git a/zuul/driver/openstack/openstackendpoint.py b/zuul/driver/openstack/openstackendpoint.py index 26e22a8678..493a249374 100644 --- a/zuul/driver/openstack/openstackendpoint.py +++ b/zuul/driver/openstack/openstackendpoint.py @@ -134,14 +134,15 @@ class OpenstackDeleteStateMachine(statemachine.StateMachine): def __init__(self, endpoint, node, log): self.log = log self.endpoint = endpoint + self.node = node super().__init__(node.delete_state) - self.external_id = node.openstack_server_id self.floating_ips = None def advance(self): if self.state == self.START: - if self.external_id: - self.server = self.endpoint._getServer(self.external_id) + if self.node.openstack_server_id: + self.server = self.endpoint._getServer( + self.node.openstack_server_id) if (self.server and self.endpoint._hasFloatingIps() and self.server.get('addresses')): @@ -172,7 +173,7 @@ class OpenstackDeleteStateMachine(statemachine.StateMachine): if self.state == self.SERVER_DELETE_SUBMIT: self.delete_future = self.endpoint._submitApi( self.endpoint._deleteServer, - self.external_id) + self.node.openstack_server_id) self.state = self.SERVER_DELETE if self.state == self.SERVER_DELETE: @@ -201,6 +202,7 @@ class OpenstackCreateStateMachine(statemachine.StateMachine): image_external_id, tags, log): self.log = log self.endpoint = endpoint + self.node = node self.label = label self.flavor = flavor self.image = image @@ -260,14 +262,15 @@ class OpenstackCreateStateMachine(statemachine.StateMachine): # min_ram=self.label.min_ram, ) self.quota = quota_from_flavor(self.os_flavor, label=self.label) - self.external_id = None + self.node.openstack_server_id = None def _handleServerFault(self): # Return True if this is a quota fault - if not self.external_id: + if not self.node.openstack_server_id: return try: - server = self.endpoint._getServerByIdNow(self.external_id) + server = self.endpoint._getServerByIdNow( + self.node.openstack_server_id) if not server: return fault = server.get('fault', {}).get('message') @@ -281,7 +284,7 @@ class OpenstackCreateStateMachine(statemachine.StateMachine): def advance(self): if self.state == self.START: - self.external_id = None + self.node.openstack_server_id = None self.create_future = self.endpoint._submitApi( self.endpoint._createServer, self.hostname, @@ -306,11 +309,11 @@ class OpenstackCreateStateMachine(statemachine.StateMachine): self.create_future) if self.server is None: return - self.external_id = self.server['id'] + self.node.openstack_server_id = self.server['id'] self.state = self.SERVER_CREATING except openstack.cloud.exc.OpenStackCloudCreateException as e: if e.resource_id: - self.external_id = e.resource_id + self.node.openstack_server_id = e.resource_id if self._handleServerFault(): self.log.exception("Launch attempt failed:") raise exceptions.QuotaException("Quota exceeded") @@ -472,7 +475,8 @@ class OpenstackProviderEndpoint(BaseProviderEndpoint): if volume: server_volumes.append(volume) quota = quota_from_flavor(flavor, volumes=server_volumes) - yield OpenstackInstance(self.provider, server, quota) + yield OpenstackInstance(self.connection.cloud_name, + self.getRegionName(), server, quota) def getQuotaLimits(self): with Timer(self.log, 'API call get_compute_limits'):