Wipe Agent Token when cleaning timeout occcurs

In a relatively odd turn of events, should cleaning
have started, but then timed out due to lost communications
or a hard failure of the machine, an agent token could
previously be orphaned preventing re-cleaning.

We now explicitly remove the token in this case.

Change-Id: I236cdf6ddb040284e9fd1fa10136ad17ef665638
This commit is contained in:
Julia Kreger 2023-02-28 13:28:49 -08:00
parent 75c05be8a7
commit 47b5909486
3 changed files with 15 additions and 2 deletions

View File

@ -479,9 +479,9 @@ def cleaning_error_handler(task, logmsg, errmsg=None, traceback=False,
node.del_driver_internal_info('cleaning_reboot')
node.del_driver_internal_info('cleaning_polling')
node.del_driver_internal_info('skip_current_clean_step')
# We don't need to keep the old agent URL
# We don't need to keep the old agent URL, or token
# as it should change upon the next cleaning attempt.
node.del_driver_internal_info('agent_url')
wipe_token_and_url(task)
# For manual cleaning, the target provision state is MANAGEABLE, whereas
# for automated cleaning, it is AVAILABLE.
manual_clean = node.target_provision_state == states.MANAGEABLE

View File

@ -1150,6 +1150,9 @@ class ErrorHandlersTestCase(db_base.DbTestCase):
self.node.set_driver_internal_info('skip_current_clean_step', True)
self.node.set_driver_internal_info('clean_step_index', 0)
self.node.set_driver_internal_info('agent_url', 'url')
self.node.set_driver_internal_info('agent_secret_token', 'foo')
self.node.set_driver_internal_info('agent_secret_token_pregenerated',
False)
msg = 'error bar'
last_error = "last error"
@ -1162,6 +1165,9 @@ class ErrorHandlersTestCase(db_base.DbTestCase):
self.assertNotIn('cleaning_polling', self.node.driver_internal_info)
self.assertNotIn('skip_current_clean_step',
self.node.driver_internal_info)
self.assertNotIn('agent_secret_token', self.node.driver_internal_info)
self.assertNotIn('agent_secret_token_pregenerated',
self.node.driver_internal_info)
self.assertEqual(last_error, self.node.last_error)
self.assertTrue(self.node.maintenance)
self.assertEqual(last_error, self.node.maintenance_reason)

View File

@ -0,0 +1,7 @@
---
fixes:
- |
Fixes an issue where an agent token was being orphaned if a baremetal node
timed out during cleaning operations, leading to issues where the node
would not be able to establish a new token with Ironic upon future
in some cases. We now always wipe the token in this case.