Use uptime instead to verify controllers reboot has been completed
Use uptime instead of hostname command to validate that a node has been rebooted and the reboot is completed. Related-patch: https://review.opendev.org/c/x/tobiko/+/888442 Change-Id: Ib8c625fb385d8dff514dd7f83de122ab872a14c6
This commit is contained in:
parent
803b230905
commit
1cdce0c844
@ -107,18 +107,38 @@ def network_undisrupt_node(node_name, disrupt_method=undisrupt_network):
|
||||
|
||||
|
||||
def disrupt_node(node_name, disrupt_method=network_disruption):
|
||||
|
||||
# reboot all controllers and wait for ssh Up on them
|
||||
# hard reset is simultaneous while soft is sequential
|
||||
# method : method of disruption to use : network_disruption |
|
||||
# container_restart
|
||||
|
||||
start_time = tobiko.time()
|
||||
# using ssh_client.connect we use a fire and forget reboot method
|
||||
node = tripleo_topology.get_node(node_name)
|
||||
node.ssh_client.connect().exec_command(disrupt_method)
|
||||
LOG.info('disrupt exec: {} on server: {}'.format(disrupt_method,
|
||||
node.name))
|
||||
check_overcloud_node_responsive(node)
|
||||
|
||||
if isinstance(disrupt_method, sh.RebootHostMethod):
|
||||
check_overcloud_node_uptime(node.ssh_client, start_time)
|
||||
else:
|
||||
check_overcloud_node_responsive(node)
|
||||
|
||||
|
||||
def check_overcloud_node_uptime(ssh_client, start_time):
|
||||
for attempt in tobiko.retry(timeout=600., interval=10.):
|
||||
try:
|
||||
uptime = sh.get_uptime(ssh_client=ssh_client, timeout=15.)
|
||||
except (sh.ShellCommandFailed,
|
||||
sh.ShellTimeoutExpired,
|
||||
sh.ShellProcessTerminated):
|
||||
uptime = None
|
||||
|
||||
if uptime and uptime < (tobiko.time() - start_time):
|
||||
LOG.debug('Reboot has been completed')
|
||||
break
|
||||
else:
|
||||
attempt.check_limits()
|
||||
|
||||
|
||||
def reboot_node(node_name, wait=True, reboot_method=sh.hard_reset_method):
|
||||
@ -182,8 +202,10 @@ def disrupt_all_controller_nodes(disrupt_method=sh.hard_reset_method,
|
||||
if exclude_list:
|
||||
nodes = [node for node in nodes if node.name not in exclude_list]
|
||||
|
||||
start_time = {}
|
||||
for controller in nodes:
|
||||
if isinstance(disrupt_method, sh.RebootHostMethod):
|
||||
start_time[controller.name] = tobiko.time()
|
||||
reboot_node(controller.name, wait=sequentially,
|
||||
reboot_method=disrupt_method)
|
||||
else:
|
||||
@ -194,9 +216,14 @@ def disrupt_all_controller_nodes(disrupt_method=sh.hard_reset_method,
|
||||
tobiko.cleanup_fixture(controller.ssh_client)
|
||||
if sequentially:
|
||||
check_overcloud_node_responsive(controller)
|
||||
|
||||
if not sequentially:
|
||||
for controller in nodes:
|
||||
check_overcloud_node_responsive(controller)
|
||||
if isinstance(disrupt_method, sh.RebootHostMethod):
|
||||
check_overcloud_node_uptime(
|
||||
controller.ssh_client, start_time[controller.name])
|
||||
else:
|
||||
check_overcloud_node_responsive(controller)
|
||||
|
||||
|
||||
def reboot_all_controller_nodes(reboot_method=sh.hard_reset_method,
|
||||
@ -215,7 +242,9 @@ def reboot_all_controller_nodes(reboot_method=sh.hard_reset_method,
|
||||
if exclude_list:
|
||||
nodes = [node for node in nodes if node.name not in exclude_list]
|
||||
|
||||
start_time = {}
|
||||
for controller in nodes:
|
||||
start_time[controller.name] = tobiko.time()
|
||||
sh.reboot_host(ssh_client=controller.ssh_client, wait=sequentially,
|
||||
method=reboot_method)
|
||||
LOG.info('reboot exec: {} on server: {}'.format(reboot_method,
|
||||
@ -223,7 +252,8 @@ def reboot_all_controller_nodes(reboot_method=sh.hard_reset_method,
|
||||
tobiko.cleanup_fixture(controller.ssh_client)
|
||||
if not sequentially:
|
||||
for controller in nodes:
|
||||
check_overcloud_node_responsive(controller)
|
||||
check_overcloud_node_uptime(
|
||||
controller.ssh_client, start_time[controller.name])
|
||||
|
||||
|
||||
def is_ipv6addr_main_vip():
|
||||
|
Loading…
Reference in New Issue
Block a user