From 29fb1c44353b1301868095030603f09642bf438f Mon Sep 17 00:00:00 2001
From: Eric MacDonald <eric.macdonald@windriver.com>
Date: Tue, 13 Jun 2023 23:34:04 +0000
Subject: [PATCH] Increase collect ssh, scp and sudo expect operation timeouts

The collect operation has been seen to fail with a timeout error
when collecting from remote hosts over a high latency network.

This update consolidates the collect timeouts into a separate
source included file '/etc/collect/collect_timeouts'.

The ssh, scp and sudo timeouts were seen to vary from function
to function. Since the timeout is always waiting for password
prompt this update normaizes them all to 60 seconds.

Move additional miscellaneous timeouts to the timeouts file
giving them opportunity to be configurable in the future.

Test Plan: High latency is 1200 ms

PASS: Verify collect system hosts on typical network
PASS: Verify collect multiple subclouds on typical network
PASS: Verify collect system hosts on high latency network
PASS: Verify collect multiple subclouds on high latency network
PASS: Verify collect subcloud with persistent long delays
      ... 1200ms, 1500ms, 2000ms, 300ms and 5000ms
PASS: Verify that the new collect timeouts file can be modified and
      those modified values used in subsequent collect operations
PASS: High latency collect soak (10 iterations)

Closes-Bug: 2023554
Change-Id: I6fa318eea35c175d01646d93220637e95efd29e1
Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
---
 tools/collector/debian-scripts/collect        | 57 ++++++++++---------
 .../collector/debian-scripts/collect_timeouts | 27 +++++++++
 tools/collector/debian/deb_folder/rules       |  1 +
 3 files changed, 59 insertions(+), 26 deletions(-)
 create mode 100644 tools/collector/debian-scripts/collect_timeouts

diff --git a/tools/collector/debian-scripts/collect b/tools/collector/debian-scripts/collect
index c23a3c18..ac1e493a 100644
--- a/tools/collector/debian-scripts/collect
+++ b/tools/collector/debian-scripts/collect
@@ -207,6 +207,7 @@ pw=""
 
 # pull in common utils and environment
 source /usr/local/sbin/collect_utils
+source /etc/collect/collect_timeouts
 
 declare -i RETVAL=${FAIL}
 function collect_exit()
@@ -274,12 +275,25 @@ trap cleanup EXIT                 # clean exit
 # 1 = show expect outout
 USER_LOG_MODE=0
 
+# Set the default collect host timeout
+COLLECT_HOST_TIMEOUT=${COLLECT_HOST_TIMEOUT_DEFAULT}
+
+# Set the default timeout for creating the final collect tarball
+CREATE_TARBALL_TIMEOUT=${CREATE_TARBALL_TIMEOUT_DEFAULT}
+
+# set the default sudo timeout
+SUDO_TIMEOUT=${SUDO_TIMEOUT_DEFAULT}
+
 # limit scp bandwidth to 1MB/s
 # increase limit of scp bandwidth from 1MB/s to 10MB/s
 SCP_CMD="scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o PreferredAuthentications=password -o PubkeyAuthentication=no -l $((10*8*1000))"
-SCP_TIMEOUT="600"
+SCP_TIMEOUT="${SCP_TIMEOUT_DEFAULT}"
+
 SSH_CMD="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o PreferredAuthentications=password -o PubkeyAuthentication=no"
+SSH_TIMEOUT=${SSH_TIMEOUT_DEFAULT}
+
 NOWDATE=$(date +"%Y%m%d.%H%M%S")
+
 COLLECT_BASE_DIR="/scratch"
 collect_host="/usr/local/sbin/collect_host"
 collect="/usr/local/sbin/collect"
@@ -425,15 +439,6 @@ COLLECT_CONTINUE_MSG_NEEDED=false
 SUBCLOUD_COLLECT_CONTINUE=false
 SUBCLOUD_COLLECT_CONTINUE_LIST_FILE="/tmp/collect_continue.lst"
 
-declare -i TIMEOUT_MIN_MINS=10
-declare -i TIMEOUT_MAX_MINS=120
-declare -i TIMEOUT_DEF_MINS=20
-declare -i TIMEOUT_MIN_SECS=$(($TIMEOUT_MAX_MINS*60))
-declare -i TIMEOUT_MAX_SECS=$(($TIMEOUT_MAX_MINS*60))
-declare -i TIMEOUT_DEF_SECS=$(($TIMEOUT_DEF_MINS*60)) # 20 minutes
-
-# overall collect timeout
-declare -i TIMEOUT=${TIMEOUT_DEF_SECS}
 SECONDS=0
 
 COLLECT_NAME=""
@@ -1137,7 +1142,7 @@ function passwordless_sudo_test()
 /usr/bin/expect  << EOF
     log_user ${USER_LOG_MODE}
     spawn bash -i
-    set timeout 60
+    set timeout ${SUDO_TIMEOUT}
     expect -re $
     send "sudo cat /usr/local/sbin/expect_done\n"
     expect {
@@ -1186,7 +1191,7 @@ function check_host_reachable()
     log_user ${USER_LOG_MODE}
     spawn bash -i
     expect -re $
-    set timeout 60
+    set timeout ${SSH_TIMEOUT}
     send "${SSH_CMD} ${UN}@${hostname} cat ${cmd_done_file}\n"
     expect {
         "assword:" {
@@ -1246,7 +1251,7 @@ function clean_scratch_dir_local ()
 /usr/bin/expect << EOF
     log_user ${USER_LOG_MODE}
     spawn bash -i
-    set timeout 60
+    set timeout ${SUDO_TIMEOUT}
     expect -re $
     send -- "sudo rm -rf ${directory}/*_????????.??????* ; cat ${cmd_done_file}\n"
     expect {
@@ -1285,14 +1290,14 @@ function clean_scratch_dir_remote()
     log_user ${USER_LOG_MODE}
     spawn bash -i
     expect -re $
-    set timeout 60
+    set timeout ${SSH_TIMEOUT}
     send "${SSH_CMD} ${UN}@${this_hostname}\n"
     expect {
         "assword:" {
             send "${pw}\r"
             expect {
                 "${this_hostname}" {
-                    set timeout 30
+                    set timeout ${SUDO_TIMEOUT}
                     expect -re $
                     send "sudo rm -rf ${directory}/*_????????.??????* ; cat ${cmd_done_file}\n"
                     expect {
@@ -1361,7 +1366,7 @@ function delete_remote_dir_or_file()
     log_user ${USER_LOG_MODE}
     spawn bash -i
     expect -re $
-    set timeout 60
+    set timeout ${SSH_TIMEOUT}
     send "${SSH_CMD} ${UN}@${remote_hostname}\n"
     expect {
         "assword:" {
@@ -1371,7 +1376,7 @@ function delete_remote_dir_or_file()
                 "${login_prompt}" {}
                 "${alt_login_prompt}" {}
             }
-            set timeout 10
+            set timeout ${SUDO_TIMEOUT}
             expect -re $
             send "sudo rm -rf ${dir_or_file} ; cat ${cmd_done_file}\n"
             expect {
@@ -1540,7 +1545,7 @@ function create_collect_dir_local()
 /usr/bin/expect << EOF
     log_user ${USER_LOG_MODE}
     spawn bash -i
-    set timeout 10
+    set timeout ${SUDO_TIMEOUT}
     expect -re $
     send "sudo mkdir -m 775 -p ${dir} ; cat ${cmd_done_file}\n"
     expect {
@@ -1596,7 +1601,7 @@ function remove_file_local()
 /usr/bin/expect << EOF
     log_user ${USER_LOG_MODE}
     spawn bash -i
-    set timeout 10
+    set timeout ${SUDO_TIMEOUT}
     expect -re $
     send -- "sudo rm -f ${local_file} ; cat ${cmd_done_file}\n"
     expect {
@@ -1633,7 +1638,7 @@ function remove_dir_local()
 /usr/bin/expect << EOF
     log_user ${USER_LOG_MODE}
     spawn bash -i
-    set timeout 10
+    set timeout ${SUDO_TIMEOUT}
     expect -re $
     send -- "sudo rm -rf ${dir} ; cat ${cmd_done_file}\n"
     expect {
@@ -1672,7 +1677,7 @@ function move_file_local()
 /usr/bin/expect << EOF
     log_user ${USER_LOG_MODE}
     spawn bash -i
-    set timeout 10
+    set timeout ${SUDO_TIMEOUT}
     expect -re $
     send -- "sudo mv ${src} ${dst} ; cat ${cmd_done_file}\n"
     expect {
@@ -1832,7 +1837,7 @@ EOF
         trap exit {SIGINT SIGTERM}
         log_user ${USER_LOG_MODE}
         spawn bash -i
-        set timeout 30
+        set timeout ${SSH_TIMEOUT}
         expect -re $
         send "${SSH_CMD} ${UN}@${host}\n"
         expect {
@@ -1840,7 +1845,7 @@ EOF
                 send "${pw}\r"
                 expect {
                     "${host}:" {
-                        set timeout 600
+                        set timeout ${COLLECT_HOST_TIMEOUT}
                         send "sudo SKIP_MASK=${SKIP_MASK} ${collect_host} ${TARNAME} ${STARTDATE_OPTION} ${STARTDATE} ${STARTTIME} ${ENDDATE_OPTION} ${ENDDATE} ${ENDTIME} ${VERBOSE} ${INVENTORY}\n"
                         expect {
                             "assword:" {
@@ -1972,7 +1977,7 @@ function collect_subcloud_run()
     trap exit {SIGINT SIGTERM}
     log_user ${USER_LOG_MODE}
     spawn bash -i
-    set timeout 30
+    set timeout ${SSH_TIMEOUT}
     expect -re $
     send "${SSH_CMD} ${UN}@${subcloud}\n"
     expect {
@@ -2463,7 +2468,7 @@ function collect_subcloud_clean()
     trap exit {SIGINT SIGTERM}
     log_user ${USER_LOG_MODE}
     spawn bash -i
-    set timeout 30
+    set timeout ${SSH_TIMEOUT}
     expect -re $
     send "${SSH_CMD} ${UN}@${subcloud}\n"
     expect {
@@ -3218,7 +3223,7 @@ echo -n "creating ${COLLECT_TYPE} tarball ${TARBALL_NAME} ... "
     log_user ${USER_LOG_MODE}
     spawn bash -i
     expect -re $
-    set timeout 200
+    set timeout ${CREATE_TARBALL_TIMEOUT}
     send "(cd ${COLLECT_BASE_DIR} ; sudo ${IONICE_CMD} ${NICE_CMD} ${TAR_CMD_APPEND} ${TARBALL_NAME} --remove-files ${COLLECT_NAME}/* 2>>${COLLECT_ERROR_LOG} ; cat ${cmd_done_file})\n"
     expect {
         "assword:" {
diff --git a/tools/collector/debian-scripts/collect_timeouts b/tools/collector/debian-scripts/collect_timeouts
new file mode 100644
index 00000000..5a10e404
--- /dev/null
+++ b/tools/collector/debian-scripts/collect_timeouts
@@ -0,0 +1,27 @@
+#! /bin/bash
+#
+# Copyright (c) 2023 Wind River Systems, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+##########################################################################################
+
+# default timeouts for collect ; in seconds
+declare -i SCP_TIMEOUT_DEFAULT=600
+declare -i SSH_TIMEOUT_DEFAULT=60
+declare -i SUDO_TIMEOUT_DEFAULT=60
+declare -i COLLECT_HOST_TIMEOUT_DEFAULT=600
+declare -i CREATE_TARBALL_TIMEOUT_DEFAULT=200
+
+declare -i TIMEOUT_MIN_MINS=10
+declare -i TIMEOUT_MAX_MINS=120
+declare -i TIMEOUT_DEF_MINS=20
+# shellcheck disable=SC2034
+declare -i TIMEOUT_MIN_SECS=$((TIMEOUT_MAX_MINS*60))
+# shellcheck disable=SC2034
+declare -i TIMEOUT_MAX_SECS=$((TIMEOUT_MAX_MINS*60))
+declare -i TIMEOUT_DEF_SECS=$((TIMEOUT_DEF_MINS*60)) # 20 minutes
+
+# overall collect timeout
+declare -i TIMEOUT=${TIMEOUT_DEF_SECS}
+
diff --git a/tools/collector/debian/deb_folder/rules b/tools/collector/debian/deb_folder/rules
index a06aec28..63da6365 100755
--- a/tools/collector/debian/deb_folder/rules
+++ b/tools/collector/debian/deb_folder/rules
@@ -26,6 +26,7 @@ override_dh_auto_install:
 	install -m 755 -p collect_date $(ROOT)/usr/local/sbin/collect_date
 	install -m 755 -p collect_utils $(ROOT)/usr/local/sbin/collect_utils
 	install -m 755 -p collect_parms $(ROOT)/usr/local/sbin/collect_parms
+	install -m 755 -p collect_timeouts $(SYSCONFDIR)/collect/collect_timeouts
 	install -m 755 -p collect_mask_passwords $(ROOT)/usr/local/sbin/collect_mask_passwords
 	install -m 755 -p expect_done $(ROOT)/usr/local/sbin/expect_done
 	install -m 755 -p mariadb-cli.sh $(ROOT)/usr/local/sbin/mariadb-cli