Set node "alive" when inspection finished
To make the node fast trackable as soon as inspection finishes, in addition add a wait for the agent to callback should it not be available when fast track is attempted. Closes-Bug: #2078820 Change-Id: I8a95fc08cf355b7b745a565e3a05c9dc0875a63e
This commit is contained in:
parent
3f22f8c5f0
commit
b07ebc4123
@ -95,6 +95,12 @@ def do_node_clean(task, clean_steps=None, disable_ramdisk=False):
|
||||
'out-of-band only cleaning has been requested for node '
|
||||
'%s', node.uuid)
|
||||
prepare_result = None
|
||||
except exception.AgentConnectionFailed:
|
||||
LOG.info('Agent is not yet running on node %(node)s, waiting for'
|
||||
' agent to come up for fast track', {'node': node.uuid})
|
||||
target_state = states.MANAGEABLE if manual_clean else None
|
||||
task.process_event('wait', target_state=target_state)
|
||||
return
|
||||
except Exception as e:
|
||||
msg = (_('Failed to prepare node %(node)s for cleaning: %(e)s')
|
||||
% {'node': node.uuid, 'e': e})
|
||||
|
@ -187,6 +187,11 @@ def do_node_deploy(task, conductor_id=None, configdrive=None,
|
||||
|
||||
try:
|
||||
task.driver.deploy.prepare(task)
|
||||
except exception.AgentConnectionFailed:
|
||||
LOG.info('Agent is not yet running on node %(node)s, waiting for agent'
|
||||
' to come up for fast track', {'node': node.uuid})
|
||||
task.process_event('wait')
|
||||
return
|
||||
except exception.IronicException as e:
|
||||
with excutils.save_and_reraise_exception():
|
||||
utils.deploying_error_handler(
|
||||
|
@ -1150,13 +1150,18 @@ def fast_track_able(task):
|
||||
def value_within_timeout(value, timeout):
|
||||
"""Checks if the time is within the previous timeout seconds from now.
|
||||
|
||||
:param value: a string representing date and time or None.
|
||||
:param value: a datetime or string representing date and time or None.
|
||||
:param timeout: timeout in seconds.
|
||||
"""
|
||||
# use native datetime objects for conversion and compare
|
||||
# slightly odd because py2 compatibility :(
|
||||
last = datetime.datetime.strptime(value or '1970-01-01T00:00:00.000000',
|
||||
"%Y-%m-%dT%H:%M:%S.%f")
|
||||
if isinstance(value, datetime.datetime):
|
||||
# Converts to a offset-naive datetime(as created by timeutils.utcnow())
|
||||
last = value.replace(tzinfo=None)
|
||||
else:
|
||||
defaultdt = '1970-01-01T00:00:00.000000'
|
||||
last = datetime.datetime.strptime(value or defaultdt,
|
||||
'%Y-%m-%dT%H:%M:%S.%f')
|
||||
# If we found nothing, we assume that the time is essentially epoch.
|
||||
time_delta = datetime.timedelta(seconds=timeout)
|
||||
last_valid = timeutils.utcnow() - time_delta
|
||||
@ -1173,14 +1178,20 @@ def agent_is_alive(node, timeout=None):
|
||||
:param node: A node object.
|
||||
:param timeout: Heartbeat timeout, defaults to `fast_track_timeout`.
|
||||
"""
|
||||
|
||||
timeout = timeout or CONF.deploy.fast_track_timeout
|
||||
if node.power_state == states.POWER_ON and \
|
||||
node.inspection_finished_at and \
|
||||
value_within_timeout(node.inspection_finished_at, timeout):
|
||||
return True
|
||||
|
||||
# If no agent_url is present then we have powered down since the
|
||||
# last agent heartbeat
|
||||
if not node.driver_internal_info.get('agent_url'):
|
||||
return False
|
||||
|
||||
return value_within_timeout(
|
||||
node.driver_internal_info.get('agent_last_heartbeat'),
|
||||
timeout or CONF.deploy.fast_track_timeout)
|
||||
node.driver_internal_info.get('agent_last_heartbeat'), timeout)
|
||||
|
||||
|
||||
def is_fast_track(task):
|
||||
|
@ -2238,6 +2238,18 @@ class FastTrackTestCase(db_base.DbTestCase):
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
self.assertFalse(conductor_utils.is_fast_track(task))
|
||||
|
||||
def test_is_fast_track_inspected_no_heartbeat(self, mock_get_power):
|
||||
mock_get_power.return_value = states.POWER_ON
|
||||
self.node = obj_utils.create_test_node(
|
||||
self.context, driver='fake-hardware',
|
||||
uuid=uuidutils.generate_uuid(),
|
||||
inspection_finished_at=timeutils.utcnow(),
|
||||
power_state=states.POWER_ON
|
||||
)
|
||||
with task_manager.acquire(
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
self.assertTrue(conductor_utils.is_fast_track(task))
|
||||
|
||||
def test_is_fast_track_powered_after_heartbeat(self, mock_get_power):
|
||||
mock_get_power.return_value = states.POWER_ON
|
||||
with task_manager.acquire(
|
||||
|
@ -0,0 +1,7 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Set node "alive" and make it fast trackable
|
||||
as soon as inspection is finished, in addition
|
||||
add a wait for the agent to callback should
|
||||
it not be available when fast track is attempted.
|
Loading…
Reference in New Issue
Block a user