Browse Source

OpenStack: store ZK records for launch error nodes

If we get an error on create server, we currently leak the instance
because we don't store the external id of the instance in ZK.  It
should eventually be deleted since it's a leaked instance, but
we try to keep track of as much as possible.  OpenStackSDK can
often return the external id to us in these cases, so handle that
case and store the external id on a ZK record so that the instance
is correctly accounted for.

Change-Id: I7ec448e9a7cf6cd01903bf7b5bf4b07a1c143fb8
James E. Blair 4 months ago
parent
commit
afbf9108d8
1 changed files with 24 additions and 16 deletions
  1. 24
    16
      nodepool/driver/openstack/handler.py

+ 24
- 16
nodepool/driver/openstack/handler.py View File

@@ -18,6 +18,7 @@ import pprint
18 18
 import random
19 19
 
20 20
 from kazoo import exceptions as kze
21
+import openstack
21 22
 
22 23
 from nodepool import exceptions
23 24
 from nodepool import nodeutils as utils
@@ -122,22 +123,29 @@ class OpenStackNodeLauncher(NodeLauncher):
122 123
         # because that isn't available in ZooKeeper until after the server is
123 124
         # active, which could cause a race in leak detection.
124 125
 
125
-        server = self.handler.manager.createServer(
126
-            hostname,
127
-            image=image_external,
128
-            min_ram=self.label.min_ram,
129
-            flavor_name=self.label.flavor_name,
130
-            key_name=self.label.key_name,
131
-            az=self.node.az,
132
-            config_drive=config_drive,
133
-            nodepool_node_id=self.node.id,
134
-            nodepool_node_label=self.node.type[0],
135
-            nodepool_image_name=image_name,
136
-            networks=self.pool.networks,
137
-            security_groups=self.pool.security_groups,
138
-            boot_from_volume=self.label.boot_from_volume,
139
-            volume_size=self.label.volume_size,
140
-            instance_properties=self.label.instance_properties)
126
+        try:
127
+            server = self.handler.manager.createServer(
128
+                hostname,
129
+                image=image_external,
130
+                min_ram=self.label.min_ram,
131
+                flavor_name=self.label.flavor_name,
132
+                key_name=self.label.key_name,
133
+                az=self.node.az,
134
+                config_drive=config_drive,
135
+                nodepool_node_id=self.node.id,
136
+                nodepool_node_label=self.node.type[0],
137
+                nodepool_image_name=image_name,
138
+                networks=self.pool.networks,
139
+                security_groups=self.pool.security_groups,
140
+                boot_from_volume=self.label.boot_from_volume,
141
+                volume_size=self.label.volume_size,
142
+                instance_properties=self.label.instance_properties)
143
+        except openstack.cloud.exc.OpenStackCloudCreateException as e:
144
+            if e.resource_id:
145
+                self.node.external_id = e.resource_id
146
+                # The outer exception handler will handle storing the
147
+                # node immediately after this.
148
+            raise
141 149
 
142 150
         self.node.external_id = server.id
143 151
         self.node.hostname = hostname

Loading…
Cancel
Save