From 4287951d711b94972abd13bac6dcbd7250e0867e Mon Sep 17 00:00:00 2001 From: Derek Higgins Date: Wed, 3 Feb 2021 16:00:57 +0000 Subject: [PATCH] Don't mark an agent as alive if rebooted If 'agent_url' has been cleared from internal_info it indicates that the node has been powered off. Change-Id: Idba486c98e1e92d35fca2e2d156866566acb9e40 Story: 2008583 Task: 41736 --- ironic/conductor/utils.py | 5 +++++ ironic/tests/unit/conductor/test_utils.py | 11 ++++++++++- .../notes/agent-rebooted-fab20d012fe6cbe8.yaml | 6 ++++++ 3 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/agent-rebooted-fab20d012fe6cbe8.yaml diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index d0e2bbd157..f251dbff91 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -1042,6 +1042,11 @@ def agent_is_alive(node, timeout=None): :param node: A node object. :param timeout: Heartbeat timeout, defaults to `fast_track_timeout`. """ + # If no agent_url is present then we have powered down since the + # last agent heartbeat + if not node.driver_internal_info.get('agent_url'): + return False + return value_within_timeout( node.driver_internal_info.get('agent_last_heartbeat'), timeout or CONF.deploy.fast_track_timeout) diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py index 8d0bf5f35c..5954ef266a 100644 --- a/ironic/tests/unit/conductor/test_utils.py +++ b/ironic/tests/unit/conductor/test_utils.py @@ -1934,7 +1934,8 @@ class FastTrackTestCase(db_base.DbTestCase): self.context, driver='fake-hardware', uuid=uuidutils.generate_uuid(), driver_internal_info={ - 'agent_last_heartbeat': str(timeutils.utcnow().isoformat())}) + 'agent_last_heartbeat': str(timeutils.utcnow().isoformat()), + 'agent_url': 'a_url'}) self.config(fast_track=True, group='deploy') def test_is_fast_track(self, mock_get_power): @@ -1966,6 +1967,14 @@ class FastTrackTestCase(db_base.DbTestCase): self.context, self.node.uuid, shared=False) as task: self.assertFalse(conductor_utils.is_fast_track(task)) + def test_is_fast_track_powered_after_heartbeat(self, mock_get_power): + mock_get_power.return_value = states.POWER_ON + with task_manager.acquire( + self.context, self.node.uuid, shared=False) as task: + conductor_utils.node_power_action(task, states.POWER_OFF) + conductor_utils.node_power_action(task, states.POWER_ON) + self.assertFalse(conductor_utils.is_fast_track(task)) + def test_is_fast_track_error_blocks(self, mock_get_power): mock_get_power.return_value = states.POWER_ON self.node.last_error = "bad things happened" diff --git a/releasenotes/notes/agent-rebooted-fab20d012fe6cbe8.yaml b/releasenotes/notes/agent-rebooted-fab20d012fe6cbe8.yaml new file mode 100644 index 0000000000..863eb5191f --- /dev/null +++ b/releasenotes/notes/agent-rebooted-fab20d012fe6cbe8.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Fixes fast-track to prevent marking the agent as alive if + trying to rebuild a node before the fast-track timeout has + expired.