Increase collect ssh, scp and sudo expect operation timeouts
The collect operation has been seen to fail with a timeout error when collecting from remote hosts over a high latency network. This update consolidates the collect timeouts into a separate source included file '/etc/collect/collect_timeouts'. The ssh, scp and sudo timeouts were seen to vary from function to function. Since the timeout is always waiting for password prompt this update normaizes them all to 60 seconds. Move additional miscellaneous timeouts to the timeouts file giving them opportunity to be configurable in the future. Test Plan: High latency is 1200 ms PASS: Verify collect system hosts on typical network PASS: Verify collect multiple subclouds on typical network PASS: Verify collect system hosts on high latency network PASS: Verify collect multiple subclouds on high latency network PASS: Verify collect subcloud with persistent long delays ... 1200ms, 1500ms, 2000ms, 300ms and 5000ms PASS: Verify that the new collect timeouts file can be modified and those modified values used in subsequent collect operations PASS: High latency collect soak (10 iterations) Closes-Bug: 2023554 Change-Id: I6fa318eea35c175d01646d93220637e95efd29e1 Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
parent
d56d82ce4c
commit
29fb1c4435
tools/collector
@ -207,6 +207,7 @@ pw=""
|
||||
|
||||
# pull in common utils and environment
|
||||
source /usr/local/sbin/collect_utils
|
||||
source /etc/collect/collect_timeouts
|
||||
|
||||
declare -i RETVAL=${FAIL}
|
||||
function collect_exit()
|
||||
@ -274,12 +275,25 @@ trap cleanup EXIT # clean exit
|
||||
# 1 = show expect outout
|
||||
USER_LOG_MODE=0
|
||||
|
||||
# Set the default collect host timeout
|
||||
COLLECT_HOST_TIMEOUT=${COLLECT_HOST_TIMEOUT_DEFAULT}
|
||||
|
||||
# Set the default timeout for creating the final collect tarball
|
||||
CREATE_TARBALL_TIMEOUT=${CREATE_TARBALL_TIMEOUT_DEFAULT}
|
||||
|
||||
# set the default sudo timeout
|
||||
SUDO_TIMEOUT=${SUDO_TIMEOUT_DEFAULT}
|
||||
|
||||
# limit scp bandwidth to 1MB/s
|
||||
# increase limit of scp bandwidth from 1MB/s to 10MB/s
|
||||
SCP_CMD="scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o PreferredAuthentications=password -o PubkeyAuthentication=no -l $((10*8*1000))"
|
||||
SCP_TIMEOUT="600"
|
||||
SCP_TIMEOUT="${SCP_TIMEOUT_DEFAULT}"
|
||||
|
||||
SSH_CMD="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o PreferredAuthentications=password -o PubkeyAuthentication=no"
|
||||
SSH_TIMEOUT=${SSH_TIMEOUT_DEFAULT}
|
||||
|
||||
NOWDATE=$(date +"%Y%m%d.%H%M%S")
|
||||
|
||||
COLLECT_BASE_DIR="/scratch"
|
||||
collect_host="/usr/local/sbin/collect_host"
|
||||
collect="/usr/local/sbin/collect"
|
||||
@ -425,15 +439,6 @@ COLLECT_CONTINUE_MSG_NEEDED=false
|
||||
SUBCLOUD_COLLECT_CONTINUE=false
|
||||
SUBCLOUD_COLLECT_CONTINUE_LIST_FILE="/tmp/collect_continue.lst"
|
||||
|
||||
declare -i TIMEOUT_MIN_MINS=10
|
||||
declare -i TIMEOUT_MAX_MINS=120
|
||||
declare -i TIMEOUT_DEF_MINS=20
|
||||
declare -i TIMEOUT_MIN_SECS=$(($TIMEOUT_MAX_MINS*60))
|
||||
declare -i TIMEOUT_MAX_SECS=$(($TIMEOUT_MAX_MINS*60))
|
||||
declare -i TIMEOUT_DEF_SECS=$(($TIMEOUT_DEF_MINS*60)) # 20 minutes
|
||||
|
||||
# overall collect timeout
|
||||
declare -i TIMEOUT=${TIMEOUT_DEF_SECS}
|
||||
SECONDS=0
|
||||
|
||||
COLLECT_NAME=""
|
||||
@ -1137,7 +1142,7 @@ function passwordless_sudo_test()
|
||||
/usr/bin/expect << EOF
|
||||
log_user ${USER_LOG_MODE}
|
||||
spawn bash -i
|
||||
set timeout 60
|
||||
set timeout ${SUDO_TIMEOUT}
|
||||
expect -re $
|
||||
send "sudo cat /usr/local/sbin/expect_done\n"
|
||||
expect {
|
||||
@ -1186,7 +1191,7 @@ function check_host_reachable()
|
||||
log_user ${USER_LOG_MODE}
|
||||
spawn bash -i
|
||||
expect -re $
|
||||
set timeout 60
|
||||
set timeout ${SSH_TIMEOUT}
|
||||
send "${SSH_CMD} ${UN}@${hostname} cat ${cmd_done_file}\n"
|
||||
expect {
|
||||
"assword:" {
|
||||
@ -1246,7 +1251,7 @@ function clean_scratch_dir_local ()
|
||||
/usr/bin/expect << EOF
|
||||
log_user ${USER_LOG_MODE}
|
||||
spawn bash -i
|
||||
set timeout 60
|
||||
set timeout ${SUDO_TIMEOUT}
|
||||
expect -re $
|
||||
send -- "sudo rm -rf ${directory}/*_????????.??????* ; cat ${cmd_done_file}\n"
|
||||
expect {
|
||||
@ -1285,14 +1290,14 @@ function clean_scratch_dir_remote()
|
||||
log_user ${USER_LOG_MODE}
|
||||
spawn bash -i
|
||||
expect -re $
|
||||
set timeout 60
|
||||
set timeout ${SSH_TIMEOUT}
|
||||
send "${SSH_CMD} ${UN}@${this_hostname}\n"
|
||||
expect {
|
||||
"assword:" {
|
||||
send "${pw}\r"
|
||||
expect {
|
||||
"${this_hostname}" {
|
||||
set timeout 30
|
||||
set timeout ${SUDO_TIMEOUT}
|
||||
expect -re $
|
||||
send "sudo rm -rf ${directory}/*_????????.??????* ; cat ${cmd_done_file}\n"
|
||||
expect {
|
||||
@ -1361,7 +1366,7 @@ function delete_remote_dir_or_file()
|
||||
log_user ${USER_LOG_MODE}
|
||||
spawn bash -i
|
||||
expect -re $
|
||||
set timeout 60
|
||||
set timeout ${SSH_TIMEOUT}
|
||||
send "${SSH_CMD} ${UN}@${remote_hostname}\n"
|
||||
expect {
|
||||
"assword:" {
|
||||
@ -1371,7 +1376,7 @@ function delete_remote_dir_or_file()
|
||||
"${login_prompt}" {}
|
||||
"${alt_login_prompt}" {}
|
||||
}
|
||||
set timeout 10
|
||||
set timeout ${SUDO_TIMEOUT}
|
||||
expect -re $
|
||||
send "sudo rm -rf ${dir_or_file} ; cat ${cmd_done_file}\n"
|
||||
expect {
|
||||
@ -1540,7 +1545,7 @@ function create_collect_dir_local()
|
||||
/usr/bin/expect << EOF
|
||||
log_user ${USER_LOG_MODE}
|
||||
spawn bash -i
|
||||
set timeout 10
|
||||
set timeout ${SUDO_TIMEOUT}
|
||||
expect -re $
|
||||
send "sudo mkdir -m 775 -p ${dir} ; cat ${cmd_done_file}\n"
|
||||
expect {
|
||||
@ -1596,7 +1601,7 @@ function remove_file_local()
|
||||
/usr/bin/expect << EOF
|
||||
log_user ${USER_LOG_MODE}
|
||||
spawn bash -i
|
||||
set timeout 10
|
||||
set timeout ${SUDO_TIMEOUT}
|
||||
expect -re $
|
||||
send -- "sudo rm -f ${local_file} ; cat ${cmd_done_file}\n"
|
||||
expect {
|
||||
@ -1633,7 +1638,7 @@ function remove_dir_local()
|
||||
/usr/bin/expect << EOF
|
||||
log_user ${USER_LOG_MODE}
|
||||
spawn bash -i
|
||||
set timeout 10
|
||||
set timeout ${SUDO_TIMEOUT}
|
||||
expect -re $
|
||||
send -- "sudo rm -rf ${dir} ; cat ${cmd_done_file}\n"
|
||||
expect {
|
||||
@ -1672,7 +1677,7 @@ function move_file_local()
|
||||
/usr/bin/expect << EOF
|
||||
log_user ${USER_LOG_MODE}
|
||||
spawn bash -i
|
||||
set timeout 10
|
||||
set timeout ${SUDO_TIMEOUT}
|
||||
expect -re $
|
||||
send -- "sudo mv ${src} ${dst} ; cat ${cmd_done_file}\n"
|
||||
expect {
|
||||
@ -1832,7 +1837,7 @@ EOF
|
||||
trap exit {SIGINT SIGTERM}
|
||||
log_user ${USER_LOG_MODE}
|
||||
spawn bash -i
|
||||
set timeout 30
|
||||
set timeout ${SSH_TIMEOUT}
|
||||
expect -re $
|
||||
send "${SSH_CMD} ${UN}@${host}\n"
|
||||
expect {
|
||||
@ -1840,7 +1845,7 @@ EOF
|
||||
send "${pw}\r"
|
||||
expect {
|
||||
"${host}:" {
|
||||
set timeout 600
|
||||
set timeout ${COLLECT_HOST_TIMEOUT}
|
||||
send "sudo SKIP_MASK=${SKIP_MASK} ${collect_host} ${TARNAME} ${STARTDATE_OPTION} ${STARTDATE} ${STARTTIME} ${ENDDATE_OPTION} ${ENDDATE} ${ENDTIME} ${VERBOSE} ${INVENTORY}\n"
|
||||
expect {
|
||||
"assword:" {
|
||||
@ -1972,7 +1977,7 @@ function collect_subcloud_run()
|
||||
trap exit {SIGINT SIGTERM}
|
||||
log_user ${USER_LOG_MODE}
|
||||
spawn bash -i
|
||||
set timeout 30
|
||||
set timeout ${SSH_TIMEOUT}
|
||||
expect -re $
|
||||
send "${SSH_CMD} ${UN}@${subcloud}\n"
|
||||
expect {
|
||||
@ -2463,7 +2468,7 @@ function collect_subcloud_clean()
|
||||
trap exit {SIGINT SIGTERM}
|
||||
log_user ${USER_LOG_MODE}
|
||||
spawn bash -i
|
||||
set timeout 30
|
||||
set timeout ${SSH_TIMEOUT}
|
||||
expect -re $
|
||||
send "${SSH_CMD} ${UN}@${subcloud}\n"
|
||||
expect {
|
||||
@ -3218,7 +3223,7 @@ echo -n "creating ${COLLECT_TYPE} tarball ${TARBALL_NAME} ... "
|
||||
log_user ${USER_LOG_MODE}
|
||||
spawn bash -i
|
||||
expect -re $
|
||||
set timeout 200
|
||||
set timeout ${CREATE_TARBALL_TIMEOUT}
|
||||
send "(cd ${COLLECT_BASE_DIR} ; sudo ${IONICE_CMD} ${NICE_CMD} ${TAR_CMD_APPEND} ${TARBALL_NAME} --remove-files ${COLLECT_NAME}/* 2>>${COLLECT_ERROR_LOG} ; cat ${cmd_done_file})\n"
|
||||
expect {
|
||||
"assword:" {
|
||||
|
27
tools/collector/debian-scripts/collect_timeouts
Normal file
27
tools/collector/debian-scripts/collect_timeouts
Normal file
@ -0,0 +1,27 @@
|
||||
#! /bin/bash
|
||||
#
|
||||
# Copyright (c) 2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
##########################################################################################
|
||||
|
||||
# default timeouts for collect ; in seconds
|
||||
declare -i SCP_TIMEOUT_DEFAULT=600
|
||||
declare -i SSH_TIMEOUT_DEFAULT=60
|
||||
declare -i SUDO_TIMEOUT_DEFAULT=60
|
||||
declare -i COLLECT_HOST_TIMEOUT_DEFAULT=600
|
||||
declare -i CREATE_TARBALL_TIMEOUT_DEFAULT=200
|
||||
|
||||
declare -i TIMEOUT_MIN_MINS=10
|
||||
declare -i TIMEOUT_MAX_MINS=120
|
||||
declare -i TIMEOUT_DEF_MINS=20
|
||||
# shellcheck disable=SC2034
|
||||
declare -i TIMEOUT_MIN_SECS=$((TIMEOUT_MAX_MINS*60))
|
||||
# shellcheck disable=SC2034
|
||||
declare -i TIMEOUT_MAX_SECS=$((TIMEOUT_MAX_MINS*60))
|
||||
declare -i TIMEOUT_DEF_SECS=$((TIMEOUT_DEF_MINS*60)) # 20 minutes
|
||||
|
||||
# overall collect timeout
|
||||
declare -i TIMEOUT=${TIMEOUT_DEF_SECS}
|
||||
|
@ -26,6 +26,7 @@ override_dh_auto_install:
|
||||
install -m 755 -p collect_date $(ROOT)/usr/local/sbin/collect_date
|
||||
install -m 755 -p collect_utils $(ROOT)/usr/local/sbin/collect_utils
|
||||
install -m 755 -p collect_parms $(ROOT)/usr/local/sbin/collect_parms
|
||||
install -m 755 -p collect_timeouts $(SYSCONFDIR)/collect/collect_timeouts
|
||||
install -m 755 -p collect_mask_passwords $(ROOT)/usr/local/sbin/collect_mask_passwords
|
||||
install -m 755 -p expect_done $(ROOT)/usr/local/sbin/expect_done
|
||||
install -m 755 -p mariadb-cli.sh $(ROOT)/usr/local/sbin/mariadb-cli
|
||||
|
Loading…
x
Reference in New Issue
Block a user