From afbf9108d893ede0d147da2afe16c9e6d4bc76d4 Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Thu, 29 Nov 2018 15:35:24 -0800 Subject: [PATCH] OpenStack: store ZK records for launch error nodes If we get an error on create server, we currently leak the instance because we don't store the external id of the instance in ZK. It should eventually be deleted since it's a leaked instance, but we try to keep track of as much as possible. OpenStackSDK can often return the external id to us in these cases, so handle that case and store the external id on a ZK record so that the instance is correctly accounted for. Change-Id: I7ec448e9a7cf6cd01903bf7b5bf4b07a1c143fb8 --- nodepool/driver/openstack/handler.py | 40 +++++++++++++++++----------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/nodepool/driver/openstack/handler.py b/nodepool/driver/openstack/handler.py index 47123923c..c52d45849 100644 --- a/nodepool/driver/openstack/handler.py +++ b/nodepool/driver/openstack/handler.py @@ -18,6 +18,7 @@ import pprint import random from kazoo import exceptions as kze +import openstack from nodepool import exceptions from nodepool import nodeutils as utils @@ -122,22 +123,29 @@ class OpenStackNodeLauncher(NodeLauncher): # because that isn't available in ZooKeeper until after the server is # active, which could cause a race in leak detection. - server = self.handler.manager.createServer( - hostname, - image=image_external, - min_ram=self.label.min_ram, - flavor_name=self.label.flavor_name, - key_name=self.label.key_name, - az=self.node.az, - config_drive=config_drive, - nodepool_node_id=self.node.id, - nodepool_node_label=self.node.type[0], - nodepool_image_name=image_name, - networks=self.pool.networks, - security_groups=self.pool.security_groups, - boot_from_volume=self.label.boot_from_volume, - volume_size=self.label.volume_size, - instance_properties=self.label.instance_properties) + try: + server = self.handler.manager.createServer( + hostname, + image=image_external, + min_ram=self.label.min_ram, + flavor_name=self.label.flavor_name, + key_name=self.label.key_name, + az=self.node.az, + config_drive=config_drive, + nodepool_node_id=self.node.id, + nodepool_node_label=self.node.type[0], + nodepool_image_name=image_name, + networks=self.pool.networks, + security_groups=self.pool.security_groups, + boot_from_volume=self.label.boot_from_volume, + volume_size=self.label.volume_size, + instance_properties=self.label.instance_properties) + except openstack.cloud.exc.OpenStackCloudCreateException as e: + if e.resource_id: + self.node.external_id = e.resource_id + # The outer exception handler will handle storing the + # node immediately after this. + raise self.node.external_id = server.id self.node.hostname = hostname