healthcheck_port: run ss -ntp with sudo

The output of "ss" is different if you run it as root, or as the user which is used to execute the process we want to monitor. e.g. nova-conuductor (check reported bug) ()[root@undercloud /]$ ss -ntp | grep -E ":($ports).*,pid=($pids)," (empty output) ()[root@undercloud /]$ sudo -u nova ss -ntp | grep -E ":($ports).*,pid=($pids)," ESTAB 192.168.24.1:56959 192.168.24.3:3306 users:(("nova-conductor",pid=25,fd=7)) ESTAB 192.168.24.1:46860 192.168.24.3:3306 users:(("nova-conductor",pid=26,fd=7)) ESTAB 192.168.24.1:55918 192.168.24.1:5672 users:(("nova-conductor",pid=26,fd=8)) ESTAB 192.168.24.1:56786 192.168.24.1:5672 users:(("nova-conductor",pid=26,fd=9)) ESTAB 192.168.24.1:55920 192.168.24.1:5672 users:(("nova-conductor",pid=25,fd=8)) ESTAB 192.168.24.1:57238 192.168.24.3:3306 users:(("nova-conductor",pid=25,fd=10)) ESTAB 192.168.24.1:56840 192.168.24.1:5672 users:(("nova-conductor",pid=25,fd=9)) ESTAB 192.168.24.1:35115 192.168.24.3:3306 users:(("nova-conductor",pid=26,fd=10) (output was implified for the commit message) So the idea of this patch is to introduce a new function, get_user_from_process() which will figure out what user runs the process, by using pgrep and ps. More infos about how the ps was done is documented in the code, but to make it safer we grep the pid AND cmd to get accurate informations. Then later in healthcheck_port, use the new function to figure out which user is running the process, then run the "ss" with "sudo -u" to get the accurate output and know if the process is actually connected to the port that we want. Change-Id: I7be514832fc7af8dbcfbafe15b2425db8dcfe3c7 Closes-Bug: #1843555
2019-09-11 11:49:41 -04:00 · 2019-09-11 11:49:41 -04:00 · 3283218743
parent a3104d2a14
commit 3283218743
1 changed files with 26 additions and 1 deletions
--- a/healthcheck/common.sh
+++ b/healthcheck/common.sh
@ -4,6 +4,26 @@
 : ${HEALTHCHECK_CURL_WRITE_OUT:='\n%{http_code} %{remote_ip}:%{remote_port} %{time_total} seconds\n'}
 : ${HEALTHCHECK_CURL_OUTPUT:='/dev/null'}
 get_user_from_process() {
    process=$1
    # This helps to capture the actual pids running the process
    pids=$(pgrep -d '|' -f $process)
    # 'cmd' is added to help in case part of the pid is in another pid from
    # another process.
    # $ ps -eo user,pid,cmd
    # USER         PID CMD
    # nova           1 dumb-init --single-child -- kolla_start
    # nova           7 /usr/bin/python2 /usr/bin/nova-conductor
    # nova          25 /usr/bin/python2 /usr/bin/nova-conductor
    # nova          26 /usr/bin/python2 /usr/bin/nova-conductor
    # root        8311 ps -eo user,pid,cmd
    # The following "ps" command will capture the user from PID 7 which
    # is safe enough to assert this is the user running the process.
    ps -eo user,pid,cmd | grep $process | grep -E $pids | awk 'NR==1{print $1}'
 }
 healthcheck_curl () {
    export NSS_SDB_USE_CACHE=no
    curl -g -k -q -s -S --fail -o "${HEALTHCHECK_CURL_OUTPUT}" \
@ -18,9 +38,14 @@ healthcheck_port () {
    shift 1
    args=$@
    puser=$(get_user_from_process $process)
    ports=${args// /|}
    pids=$(pgrep -d '|' -f $process)
-    ss -ntp | grep -qE ":($ports).*,pid=($pids),"
+    # https://bugs.launchpad.net/tripleo/+bug/1843555
    # "ss" output is different if run as root vs as the user actually running
    # the process. So we verify that the process is connected to the
    # port by using "sudo -u" to get the right output.
    sudo -u $puser ss -ntp | grep -qE ":($ports).*,pid=($pids),"
 }
 healthcheck_listen () {