From 1b1eab77b0461fc45dbfecb071c6b5b0900d5ed7 Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Thu, 16 Jun 2022 11:27:33 -0700 Subject: [PATCH] Fix unknown label in metastatic driver If a label is removed from the configuration while a node still exists, the periodic cleanup performed by the metastatic driver will raise an AttributeError exception when trying to access the grace_time attribute, since the label pointer is None. To address this, treat a missing label as if it has a grace time of zero seconds. This adds a test which simulates the issue. This also adds an extra log entry for when a metastatic backing node slot is deallocated is added so that we log both the allocation and deallocation. Change-Id: I0c104a2fe9874e2cd30e2bf2f2227569a73be243 --- nodepool/driver/metastatic/adapter.py | 14 ++++++- nodepool/tests/unit/test_driver_metastatic.py | 40 ++++++++++++++++++- 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/nodepool/driver/metastatic/adapter.py b/nodepool/driver/metastatic/adapter.py index a08654fb7..124b6acef 100644 --- a/nodepool/driver/metastatic/adapter.py +++ b/nodepool/driver/metastatic/adapter.py @@ -245,6 +245,7 @@ class BackingNodeRecord: idx = self.allocated_nodes.index(node_id) self.allocated_nodes[idx] = None self.last_used = time.time() + return idx def backsNode(self, node_id): return node_id in self.allocated_nodes @@ -287,8 +288,14 @@ class MetastaticAdapter(statemachine.Adapter): self.backing_node_records.items(): for bnr in backing_node_records[:]: label_config = self.provider._getLabel(bnr.label_name) + if label_config: + grace_time = label_config.grace_time + else: + # The label doesn't exist in our config any more, + # it must have been removed. + grace_time = 0 if (bnr.isEmpty() and - now - bnr.last_used > label_config.grace_time): + now - bnr.last_used > grace_time): self.log.info("Backing node %s has been idle for " "%s seconds, releasing", bnr.node_id, now - bnr.last_used) @@ -396,7 +403,10 @@ class MetastaticAdapter(statemachine.Adapter): self.backing_node_records.items(): for bn in backing_node_records: if bn.backsNode(node_id): - bn.deallocateSlot(node_id) + slot = bn.deallocateSlot(node_id) + self.log.info( + "Unassigned node %s from backing node %s slot %s", + node_id, bn.node_id, slot) return def _checkBackingNodeRequests(self): diff --git a/nodepool/tests/unit/test_driver_metastatic.py b/nodepool/tests/unit/test_driver_metastatic.py index 5f7dec398..c57627873 100644 --- a/nodepool/tests/unit/test_driver_metastatic.py +++ b/nodepool/tests/unit/test_driver_metastatic.py @@ -13,8 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os +import json import logging +import os import testtools @@ -180,3 +181,40 @@ class TestDriverMetastatic(tests.DBTestCase): self.assertEqual(nodes, [node1, bn1, node2, node3, bn2]) self.assertEqual(bn2.id, node3.driver_data['backing_node']) self.assertNotEqual(bn1.id, bn2.id) + + def test_metastatic_config_change(self): + configfile = self.setup_config('metastatic.yaml') + pool = self.useNodepool(configfile, watermark_sleep=1) + pool.start() + self.wait_for_config(pool) + manager = pool.getProviderManager('fake-provider') + manager._client.create_image(name="fake-image") + + # Request a node, verify that there is a backing node, and it + # has the same connection info + node1 = self._requestNode() + nodes = self._getNodes() + bn1 = nodes[1] + self.assertEqual(nodes, [node1, bn1]) + + # Update the node to indicate it was for a non-existent label + user_data = json.loads(bn1.user_data) + user_data['label'] = 'old-label' + bn1.user_data = json.dumps(user_data) + self.zk.storeNode(bn1) + + # Restart the provider and make sure we load data correctly + pool.stop() + pool = self.useNodepool(configfile, watermark_sleep=1) + pool.start() + self.wait_for_config(pool) + manager = pool.getProviderManager('fake-provider') + manager._client.create_image(name="fake-image") + + # Delete the metastatic node and verify that backing is deleted + node1.state = zk.DELETING + self.zk.storeNode(node1) + self.waitForNodeDeletion(node1) + self.waitForNodeDeletion(bn1) + nodes = self._getNodes() + self.assertEqual(nodes, [])