From 4481031d71339336bc1d3832e818018e4403e292 Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Tue, 28 Feb 2023 13:28:49 -0800 Subject: [PATCH] Wipe Agent Token when cleaning timeout occcurs In a relatively odd turn of events, should cleaning have started, but then timed out due to lost communications or a hard failure of the machine, an agent token could previously be orphaned preventing re-cleaning. We now explicitly remove the token in this case. Change-Id: I236cdf6ddb040284e9fd1fa10136ad17ef665638 (cherry picked from commit 47b5909486c336352c536eb2cadd121afea8cf12) --- ironic/conductor/utils.py | 4 ++-- ironic/tests/unit/conductor/test_utils.py | 6 ++++++ ...agent-token-upon-cleaning-timeout-c9add514fad1b02c.yaml | 7 +++++++ 3 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/wipe-agent-token-upon-cleaning-timeout-c9add514fad1b02c.yaml diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index cdf3a99ee5..868e2d5b09 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -481,9 +481,9 @@ def cleaning_error_handler(task, logmsg, errmsg=None, traceback=False, node.del_driver_internal_info('cleaning_reboot') node.del_driver_internal_info('cleaning_polling') node.del_driver_internal_info('skip_current_clean_step') - # We don't need to keep the old agent URL + # We don't need to keep the old agent URL, or token # as it should change upon the next cleaning attempt. - node.del_driver_internal_info('agent_url') + wipe_token_and_url(task) # For manual cleaning, the target provision state is MANAGEABLE, whereas # for automated cleaning, it is AVAILABLE. manual_clean = node.target_provision_state == states.MANAGEABLE diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py index a29da21a71..e190358d23 100644 --- a/ironic/tests/unit/conductor/test_utils.py +++ b/ironic/tests/unit/conductor/test_utils.py @@ -1172,6 +1172,9 @@ class ErrorHandlersTestCase(db_base.DbTestCase): self.node.set_driver_internal_info('skip_current_clean_step', True) self.node.set_driver_internal_info('clean_step_index', 0) self.node.set_driver_internal_info('agent_url', 'url') + self.node.set_driver_internal_info('agent_secret_token', 'foo') + self.node.set_driver_internal_info('agent_secret_token_pregenerated', + False) msg = 'error bar' last_error = "last error" @@ -1184,6 +1187,9 @@ class ErrorHandlersTestCase(db_base.DbTestCase): self.assertNotIn('cleaning_polling', self.node.driver_internal_info) self.assertNotIn('skip_current_clean_step', self.node.driver_internal_info) + self.assertNotIn('agent_secret_token', self.node.driver_internal_info) + self.assertNotIn('agent_secret_token_pregenerated', + self.node.driver_internal_info) self.assertEqual(last_error, self.node.last_error) self.assertTrue(self.node.maintenance) self.assertEqual(last_error, self.node.maintenance_reason) diff --git a/releasenotes/notes/wipe-agent-token-upon-cleaning-timeout-c9add514fad1b02c.yaml b/releasenotes/notes/wipe-agent-token-upon-cleaning-timeout-c9add514fad1b02c.yaml new file mode 100644 index 0000000000..0aa828ccda --- /dev/null +++ b/releasenotes/notes/wipe-agent-token-upon-cleaning-timeout-c9add514fad1b02c.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + Fixes an issue where an agent token was being orphaned if a baremetal node + timed out during cleaning operations, leading to issues where the node + would not be able to establish a new token with Ironic upon future + in some cases. We now always wipe the token in this case.