Set node "alive" when inspection finished

To make the node fast trackable as soon as
inspection finishes, in addition add a wait for the
agent to callback should it not be available when
fast track is attempted.

Closes-Bug: #2078820

Change-Id: I8a95fc08cf355b7b745a565e3a05c9dc0875a63e
This commit is contained in:
Derek Higgins 2024-07-02 17:36:44 +01:00
parent 3f22f8c5f0
commit b07ebc4123
5 changed files with 46 additions and 5 deletions

View File

@ -95,6 +95,12 @@ def do_node_clean(task, clean_steps=None, disable_ramdisk=False):
'out-of-band only cleaning has been requested for node '
'%s', node.uuid)
prepare_result = None
except exception.AgentConnectionFailed:
LOG.info('Agent is not yet running on node %(node)s, waiting for'
' agent to come up for fast track', {'node': node.uuid})
target_state = states.MANAGEABLE if manual_clean else None
task.process_event('wait', target_state=target_state)
return
except Exception as e:
msg = (_('Failed to prepare node %(node)s for cleaning: %(e)s')
% {'node': node.uuid, 'e': e})

View File

@ -187,6 +187,11 @@ def do_node_deploy(task, conductor_id=None, configdrive=None,
try:
task.driver.deploy.prepare(task)
except exception.AgentConnectionFailed:
LOG.info('Agent is not yet running on node %(node)s, waiting for agent'
' to come up for fast track', {'node': node.uuid})
task.process_event('wait')
return
except exception.IronicException as e:
with excutils.save_and_reraise_exception():
utils.deploying_error_handler(

View File

@ -1150,13 +1150,18 @@ def fast_track_able(task):
def value_within_timeout(value, timeout):
"""Checks if the time is within the previous timeout seconds from now.
:param value: a string representing date and time or None.
:param value: a datetime or string representing date and time or None.
:param timeout: timeout in seconds.
"""
# use native datetime objects for conversion and compare
# slightly odd because py2 compatibility :(
last = datetime.datetime.strptime(value or '1970-01-01T00:00:00.000000',
"%Y-%m-%dT%H:%M:%S.%f")
if isinstance(value, datetime.datetime):
# Converts to a offset-naive datetime(as created by timeutils.utcnow())
last = value.replace(tzinfo=None)
else:
defaultdt = '1970-01-01T00:00:00.000000'
last = datetime.datetime.strptime(value or defaultdt,
'%Y-%m-%dT%H:%M:%S.%f')
# If we found nothing, we assume that the time is essentially epoch.
time_delta = datetime.timedelta(seconds=timeout)
last_valid = timeutils.utcnow() - time_delta
@ -1173,14 +1178,20 @@ def agent_is_alive(node, timeout=None):
:param node: A node object.
:param timeout: Heartbeat timeout, defaults to `fast_track_timeout`.
"""
timeout = timeout or CONF.deploy.fast_track_timeout
if node.power_state == states.POWER_ON and \
node.inspection_finished_at and \
value_within_timeout(node.inspection_finished_at, timeout):
return True
# If no agent_url is present then we have powered down since the
# last agent heartbeat
if not node.driver_internal_info.get('agent_url'):
return False
return value_within_timeout(
node.driver_internal_info.get('agent_last_heartbeat'),
timeout or CONF.deploy.fast_track_timeout)
node.driver_internal_info.get('agent_last_heartbeat'), timeout)
def is_fast_track(task):

View File

@ -2238,6 +2238,18 @@ class FastTrackTestCase(db_base.DbTestCase):
self.context, self.node.uuid, shared=False) as task:
self.assertFalse(conductor_utils.is_fast_track(task))
def test_is_fast_track_inspected_no_heartbeat(self, mock_get_power):
mock_get_power.return_value = states.POWER_ON
self.node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
uuid=uuidutils.generate_uuid(),
inspection_finished_at=timeutils.utcnow(),
power_state=states.POWER_ON
)
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
self.assertTrue(conductor_utils.is_fast_track(task))
def test_is_fast_track_powered_after_heartbeat(self, mock_get_power):
mock_get_power.return_value = states.POWER_ON
with task_manager.acquire(

View File

@ -0,0 +1,7 @@
---
fixes:
- |
Set node "alive" and make it fast trackable
as soon as inspection is finished, in addition
add a wait for the agent to callback should
it not be available when fast track is attempted.