From d972dc93cd4e2b8fdc66e1fd69a2e947de5810cc Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Wed, 7 Apr 2021 10:46:14 -0700 Subject: [PATCH] Ignored error state cache for new requests A possibility exists where inspector *can* fail upon inspection if the database connectivity was lost on a prior action. This is because the last database transport is potentially bad and fails upon load for the transaction. The cache can then end up with an "error" state entry, which upon retrying can fail becasue it is already in error state Because there really are no guarentees regarding database failures, the best thing to do is to not trust the prior cache state if it is in error and to reset it to starting upon new introspection requests. This prevents operators from *having* to perform process restarts to force all loads to be from the database unless they manage to have a multi-inspector cluster and get another inspector node to inspect in the mean time. Change-Id: I04ae1d54028862642d043f3a8f3af99405863325 Story: 2008344 Task: 41246 Related: rhbz#1947147 --- ironic_inspector/node_cache.py | 8 +++++++- ironic_inspector/test/unit/test_node_cache.py | 11 +++++++++++ .../fix-cache-error-on-start-27f492ba863d5f92.yaml | 7 +++++++ 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/fix-cache-error-on-start-27f492ba863d5f92.yaml diff --git a/ironic_inspector/node_cache.py b/ironic_inspector/node_cache.py index 01d8e6ff7..032f1f690 100644 --- a/ironic_inspector/node_cache.py +++ b/ironic_inspector/node_cache.py @@ -682,7 +682,13 @@ def start_introspection(uuid, **kwargs): node_info=node_info) state = istate.States.starting else: - state = node_info.state + recorded_state = node_info.state + if istate.States.error == recorded_state: + # If there was a failure, return to starting state to avoid + # letting the cache block new runs from occuring. + state = istate.States.starting + else: + state = recorded_state return add_node(uuid, state, **kwargs) diff --git a/ironic_inspector/test/unit/test_node_cache.py b/ironic_inspector/test/unit/test_node_cache.py index ab426256d..bd0d6cdea 100644 --- a/ironic_inspector/test/unit/test_node_cache.py +++ b/ironic_inspector/test/unit/test_node_cache.py @@ -1311,6 +1311,17 @@ class TestStartIntrospection(test_base.NodeTest): self.node_info.uuid) self.assertFalse(add_node_mock.called) + @prepare_mocks + def test_ensure_start_on_error(self, fsm_event_mock, + add_node_mock): + def side_effect(*args): + self.node_info._state = istate.States.error + + fsm_event_mock.side_effect = side_effect + node_cache.start_introspection(self.node.uuid) + add_node_mock.assert_called_once_with(self.node_info.uuid, + istate.States.starting) + class TestIntrospectionDataDbStore(test_base.NodeTest): def setUp(self): diff --git a/releasenotes/notes/fix-cache-error-on-start-27f492ba863d5f92.yaml b/releasenotes/notes/fix-cache-error-on-start-27f492ba863d5f92.yaml new file mode 100644 index 000000000..76a38f395 --- /dev/null +++ b/releasenotes/notes/fix-cache-error-on-start-27f492ba863d5f92.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + Fixes an issue where a failed inspection due to a transient failure can + prevent retry attempts to inspect to be perceived as a failure. If a prior + inspection fails and is in ``error`` state, when a new introspection is + requested, the state is now appropriately set to ``starting``.