Fix unknown label in metastatic driver

If a label is removed from the configuration while a node still
exists, the periodic cleanup performed by the metastatic driver
will raise an AttributeError exception when trying to access
the grace_time attribute, since the label pointer is None.

To address this, treat a missing label as if it has a grace time
of zero seconds.

This adds a test which simulates the issue.

This also adds an extra log entry for when a metastatic backing
node slot is deallocated is added so that we log both the allocation
and deallocation.

Change-Id: I0c104a2fe9874e2cd30e2bf2f2227569a73be243
This commit is contained in:
James E. Blair 2022-06-16 11:27:33 -07:00
parent 5c4ca2f8a1
commit 1b1eab77b0
2 changed files with 51 additions and 3 deletions

View File

@ -245,6 +245,7 @@ class BackingNodeRecord:
idx = self.allocated_nodes.index(node_id)
self.allocated_nodes[idx] = None
self.last_used = time.time()
return idx
def backsNode(self, node_id):
return node_id in self.allocated_nodes
@ -287,8 +288,14 @@ class MetastaticAdapter(statemachine.Adapter):
self.backing_node_records.items():
for bnr in backing_node_records[:]:
label_config = self.provider._getLabel(bnr.label_name)
if label_config:
grace_time = label_config.grace_time
else:
# The label doesn't exist in our config any more,
# it must have been removed.
grace_time = 0
if (bnr.isEmpty() and
now - bnr.last_used > label_config.grace_time):
now - bnr.last_used > grace_time):
self.log.info("Backing node %s has been idle for "
"%s seconds, releasing",
bnr.node_id, now - bnr.last_used)
@ -396,7 +403,10 @@ class MetastaticAdapter(statemachine.Adapter):
self.backing_node_records.items():
for bn in backing_node_records:
if bn.backsNode(node_id):
bn.deallocateSlot(node_id)
slot = bn.deallocateSlot(node_id)
self.log.info(
"Unassigned node %s from backing node %s slot %s",
node_id, bn.node_id, slot)
return
def _checkBackingNodeRequests(self):

View File

@ -13,8 +13,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import json
import logging
import os
import testtools
@ -180,3 +181,40 @@ class TestDriverMetastatic(tests.DBTestCase):
self.assertEqual(nodes, [node1, bn1, node2, node3, bn2])
self.assertEqual(bn2.id, node3.driver_data['backing_node'])
self.assertNotEqual(bn1.id, bn2.id)
def test_metastatic_config_change(self):
configfile = self.setup_config('metastatic.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
pool.start()
self.wait_for_config(pool)
manager = pool.getProviderManager('fake-provider')
manager._client.create_image(name="fake-image")
# Request a node, verify that there is a backing node, and it
# has the same connection info
node1 = self._requestNode()
nodes = self._getNodes()
bn1 = nodes[1]
self.assertEqual(nodes, [node1, bn1])
# Update the node to indicate it was for a non-existent label
user_data = json.loads(bn1.user_data)
user_data['label'] = 'old-label'
bn1.user_data = json.dumps(user_data)
self.zk.storeNode(bn1)
# Restart the provider and make sure we load data correctly
pool.stop()
pool = self.useNodepool(configfile, watermark_sleep=1)
pool.start()
self.wait_for_config(pool)
manager = pool.getProviderManager('fake-provider')
manager._client.create_image(name="fake-image")
# Delete the metastatic node and verify that backing is deleted
node1.state = zk.DELETING
self.zk.storeNode(node1)
self.waitForNodeDeletion(node1)
self.waitForNodeDeletion(bn1)
nodes = self._getNodes()
self.assertEqual(nodes, [])