integ/utilities/platform-util/scripts/patch-restart-mtce

#!/bin/bash
#
# Copyright (c) 2016 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
##############################################################################
#
# This script supports no-reboot patching of any single or
# combination of maintenance processes specified on the command line.
#
# Calling sequence:
#
# rc=mtce-restart process1 process2 process3 ...
# if [ $? != 0 ] ; then
#    restart action failed
#
#
###############################################################################
#
# The patching subsystem provides a patch-functions bash source file
# with useful function and variable definitions.
#
if [ -e "/etc/patching/patch-functions" ] ; then
   . /etc/patching/patch-functions
fi

loginfo "----------------------------------------------"
loginfo "Maintenance No-Reboot Patching Restart Request"

#
# Declare an overall script return code
#
declare -i GLOBAL_RC=$PATCH_STATUS_FAILED

#if [ ! -e $PATCH_FLAGDIR ] ; then
#    mkdir -p $PATCH_FLAGDIR
#fi

# if set with -c or --clean options then the flag files for
# each process are removed at the start.
CLEAN=false

#
# Completion status ; stored in PID index
#
DISABLED="disabled"
NOPID="not-running"
SKIPPED="skipped"
RESTARTED="restarted"

#
# process query and restart executables
#
SM_RESTART_EXEC="sm-restart-safe"
SM_QUERY_EXEC="sm-query"
PMON_RESTART_EXEC="pmon-restart"

#
# Struct indexes
#
PROCESS_INDEX=0
PID_INDEX=1
ALIAS_INDEX=2


#
# Process Struct and List       [ name ] [ alias ] [ pid | status ]
#
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NOTE TO PATCH WRITERS: Simply Un-Comment processes you want no-reboot patch restarted.
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#

# The process restart control structure
declare sm_managed_processes=""
declare pmon_managed_processes=""

# Build the process list.
# All arguements should be a valid maintenance process name.
# The name of the binary, not the SM alias.
# See the list below for supported process names.
while [[ ${#} > 0 ]]
do
    process="${1}"
    case $process in

       -c|--clean)
           CLEAN=true
           ;;

       # Maintenance Processes - SM managed
       "mtcAgent")
           sm_managed_processes=(  ${sm_managed_processes[@]} "mtcAgent:0:mtc-agent")
           ;;
       "guestAgent")
           sm_managed_processes=(  ${sm_managed_processes[@]} "guestAgent:0:guest-agent")
           ;;
       "hwmond")
           sm_managed_processes=(  ${sm_managed_processes[@]} "hwmond:0:hw-mon")
           ;;

       # Maintenance Processes - PMON managed
       "pmond")
           pmon_managed_processes=(${pmon_managed_processes[@]} "pmond:0")
           ;;
       "guestServer")
           pmon_managed_processes=(${pmon_managed_processes[@]} "guestServer:0")
           ;;
       "hbsAgent")
           pmon_managed_processes=(${pmon_managed_processes[@]} "hbsAgent:0")
           ;;
       "mtcClient")
           pmon_managed_processes=(${pmon_managed_processes[@]} "mtcClient:0")
           ;;
       "hbsClient")
           pmon_managed_processes=(${pmon_managed_processes[@]} "hbsClient:0")
           ;;
       "rmond")
           pmon_managed_processes=(${pmon_managed_processes[@]} "rmond:0")
           ;;
       "hostwd")
           pmon_managed_processes=(${pmon_managed_processes[@]} "hostwd:0")
           ;;
       "fsmond")
           pmon_managed_processes=(${pmon_managed_processes[@]} "fsmond:0")
           ;;
       "mtclogd")
           pmon_managed_processes=(${pmon_managed_processes[@]} "mtclogd:0")
           ;;
       "mtcalarmd")
           pmon_managed_processes=(${pmon_managed_processes[@]} "mtcalarmd:0")
           ;;
       "lmond")
           pmon_managed_processes=(${pmon_managed_processes[@]} "lmond:0")
           ;;

       *)
           loginfo "Unknown process:${process}"
           ;;
   esac
   shift
done

# Assume both groupings are done until we know there are not
sm_done=true
pmon_done=true

#if [ ${#sm_managed_processes[@]} -ne 0 -a is_controller ] ; then
if [ -n "${sm_managed_processes}" -a is_controller ] ; then

   # Record current process IDs
   index=0
   for DAEMON in "${sm_managed_processes[@]}"
   do
      info=(${DAEMON//:/ })

      if [ "${CLEAN}" = true ] ; then
         rm -f $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted
      fi

      info[${PID_INDEX}]=`pidof ${info[${PROCESS_INDEX}]}`
      if [ -z "${info[${PID_INDEX}]}" ] ; then
            loginfo "${info[${PROCESS_INDEX}]} is not running"
            info[${PID_INDEX}]="${NOPID}"
      fi

      # Save the PID or NOPID status to the process line
      sm_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}:${info[${ALIAS_INDEX}]}"

      ((index++))
   done

   # Restart the processes
   index=0
   for DAEMON in "${sm_managed_processes[@]}"
   do
      info=(${DAEMON//:/ })

      if [ -e $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted ] ; then
          info[${PID_INDEX}]="${SKIPPED}"

          # Add the PID to the process line
          sm_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}:${info[${ALIAS_INDEX}]}"
          ((index++))

          continue
      fi
      sm_query_result=`${SM_QUERY_EXEC} service ${info[${ALIAS_INDEX}]}`
      if [[ "${sm_query_result}" == *"enabled-active"* ]] ; then
         # Save the original PID
         info[${PID_INDEX}]=`pidof ${info[${PROCESS_INDEX}]}`

         if [ -n "${info[${PID_INDEX}]}" ] ; then

            loginfo "sm-restart of ${info[${PROCESS_INDEX}]} [pid:${info[${PID_INDEX}]}]"
            touch $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted
            ${SM_RESTART_EXEC} service "${info[${ALIAS_INDEX}]}"
            sm_done=false
            sleep 5

         else
            loginfo "${info[${PROCESS_INDEX}]} is not running ; must be on inactive controller"
            info[${PID_INDEX}]="${NOPID}"
         fi
      elif [[ ${sm_query_result} == *"is enabling"* ]] ; then
         info[${PID_INDEX}]="${NOPID}"
         loginfo "sm-restart ${info[${PROCESS_INDEX}]} ; [in progress] ; [pid:${info[${PID_INDEX}]}]"
      else
         info[${PID_INDEX}]="${DISABLED}"
         loginfo "${info[${PROCESS_INDEX}]} is not active"
      fi

      # Add the PID to the process line
      sm_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}:${info[${ALIAS_INDEX}]}"

      ((index++))
   done
fi

if [ -n "${pmon_managed_processes}" ] ; then

   echo "DEBUG: pmon_managed_processes:${pmon_managed_processes}"

   # Restart the pmond processes
   index=0
   for DAEMON in "${pmon_managed_processes[@]}"
   do
      info=(${DAEMON//:/ })

      if [ "${CLEAN}" = true ] ; then
         rm -f $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted
      fi

      if [ -e $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted ] ; then
         info[${PID_INDEX}]="${SKIPPED}"
         pmon_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}"
         ((index++))
         continue
      fi

      # Save the original PID
      info[${PID_INDEX}]=`pidof ${info[${PROCESS_INDEX}]}`

      if [ -n "${info[${PID_INDEX}]}" ] ; then
        loginfo "pmon-restart of ${info[${PROCESS_INDEX}]} [pid:${info[${PID_INDEX}]}]"
        touch $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted
        ${PMON_RESTART_EXEC} ${info[${PROCESS_INDEX}]}
        pmon_done=false
        sleep 2

        ####################################################################
        # Special Handling Section
        #
        # - pmond needs 30 seconds to restart before it will start
        #   monitoring processes.We can maybe remove that in the daemon
        #   config file but for now its there and we have to wait.
        ####################################################################
        if [ "${info[${PROCESS_INDEX}]}" == "pmond" ] ; then
            sleep 30
        fi

      else
         info[${PID_INDEX}]="${DISABLED}"
         loginfo "${info[${PROCESS_INDEX}]} is not active"
      fi

      # Save the updated PID or other status to the process line
      pmon_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}"

      ((index++))
   done
fi

# check for done. If this is not met in timeout then fail is returned
if [ "$sm_done" = true -a "$pmon_done" = true ] ; then
   GLOBAL_RC=$PATCH_STATUS_OK
   loginfo "  SM Processes: ${sm_managed_processes[@]}"
   loginfo "PMON Processes: ${pmon_managed_processes[@]}"
   loginfo "Maintenance No-Reboot Patching Status: ${GLOBAL_RC} - nothing to do."
   exit ${GLOBAL_RC}
fi

# Monitor the restart of SM processes
#
# Don't want to start from the beginning of the shell
# Want time zero now plus 20 seconds.
#
SECONDS=0
TIMEOUT=120
let UNTIL=${SECONDS}+${TIMEOUT}
loginfo "restart timeout is ${TIMEOUT}"

while [ ${UNTIL} -ge ${SECONDS} ]
do
   if [ "$sm_done" = false ] ; then
      if [ is_controller -o is_cpe ] ; then
         sm_not_done=false
         index=0
         for DAEMON in "${sm_managed_processes[@]}"
         do
            info=(${DAEMON//:/ })

            # Don't wast time on processes that are being skipped due to past restart
            if [ "${info[${PID_INDEX}]}" == "${SKIPPED}" ] ; then
               ((index++))
                continue

            # Don't wast time on processes that have already restarted
            elif [ "${info[${PID_INDEX}]}" == "${RESTARTED}" ] ; then
               ((index++))
                continue

            # Don't look for disabled processes
            elif [ "${info[${PID_INDEX}]}" == "${DISABLED}" ] ; then
               ((index++))
                continue

            # Don't look at not running processes
            elif [ "${info[${PID_INDEX}]}" == "${NOPID}" ] ; then
               ((index++))
                continue

            elif [[ `sm-query service ${info[${ALIAS_INDEX}]}` == *"enabled-active"* ]] ; then

               # Save the original PID
               new_pid=`pidof ${info[${PROCESS_INDEX}]}`
               if [ $? -eq 0 -a -n ${new_pid} ] ; then

                  if [ "${info[${PID_INDEX}]}" != "${new_pid}" ] ; then
                     loginfo "${info[${PROCESS_INDEX}]} ${RESTARTED} ok [pid:${info[${PID_INDEX}]} -> ${new_pid}]"
                     info[${PID_INDEX}]="${RESTARTED}"
                  fi
               fi
            fi

            if [ "${info[${PID_INDEX}]}" != "${RESTARTED}" ] ; then
               sm_not_done=true
            fi

            # Add the PID to the process line
            sm_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}:${info[${ALIAS_INDEX}]}"

            ((index++))
         done
      fi

      # log when SM restarts are done print a summary only once
      if [ "${sm_not_done}" = false -a "${sm_done}" = false ] ; then
         sm_done=true
         logged=false
         for DAEMON in "${sm_managed_processes[@]}"
         do
            info=(${DAEMON//:/ })
            if [ "${info[${PID_INDEX}]}" == "${RESTARTED}" ] ; then
               if [ "${logged}" = false ] ; then
                   loginfo "The following 'sm managed' processes have been 'restarted'"
                   logged=true
               fi
               loginfo "... process: ${info[${PROCESS_INDEX}]}"
            fi
         done
         logged=false
         for DAEMON in "${sm_managed_processes[@]}"
         do
            info=(${DAEMON//:/ })
            if [ "${info[${PID_INDEX}]}" == "${SKIPPED}" ] ; then
               if [ "${logged}" = false ] ; then
                   loginfo "The following 'sm managed' processes have been 'skipped' ; due to previous restart"
                   logged=true
               fi
               loginfo "... process: ${info[${PROCESS_INDEX}]}"
            fi
         done
      fi
   fi

   #########################################################################
   # For all nodes ....
   #########################################################################

   # Loop over all PMON proceses looking for complete restarts.
   # Update process struct PID field as status is learned.

   if [ "$pmon_done" = false ] ; then
      # Start assuming we are not done
      pmon_not_done=false
      index=0
      for DAEMON in "${pmon_managed_processes[@]}"
      do
         info=(${DAEMON//:/ })

         # Don't wast time on processes that are being skipped due to past restart
         if [ "${info[${PID_INDEX}]}" == "${SKIPPED}" ] ; then
            ((index++))
            continue

         # Don't wast time on processes that have already restarted
         elif [ "${info[${PID_INDEX}]}" == "${RESTARTED}" ] ; then
             ((index++))
             continue

         # Don't look for disabled processes
         elif [ "${info[${PID_INDEX}]}" == "${DISABLED}" ] ; then
             ((index++))
             continue

         # Don't look at not running processes
         elif [ "${info[${PID_INDEX}]}" == "${NOPID}" ] ; then
             ((index++))
             continue
         fi

         # Save the original PID
         new_pid=`pidof ${info[${PROCESS_INDEX}]}`
         if [ $? -eq 0 -a "${new_pid}" != "" ] ; then
            # set the process as restarted as soon as we have a new pid
            if [ "${info[${PID_INDEX}]}" != "${RESTARTED}" ] ; then
               loginfo "${info[${PROCESS_INDEX}]} ${RESTARTED} ok [PID: ${info[${PID_INDEX}]} -> ${new_pid}]"
               info[${PID_INDEX}]=${RESTARTED}
            fi
         fi

         # Set not done as long as there is one process not restarted
         if [ "${info[${PID_INDEX}]}" != "${RESTARTED}" ] ; then
            pmon_not_done=true
         fi

         # Add the PID to the process line
         pmon_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}"

         ((index++))
      done
   fi

   # log when all pmond restarts are done
   if [ "${pmon_not_done}" = false -a "${pmon_done}" = false ] ; then
      pmon_done=true
      logged=false
      for DAEMON in "${pmon_managed_processes[@]}"
      do
         info=(${DAEMON//:/ })

         if [ "${info[${PID_INDEX}]}" == "${RESTARTED}" ] ; then
             if [ "${logged}" = false ] ; then
                loginfo "The following 'pmon managed' processes have been 'restarted'"
                logged=true
            fi
            loginfo "... process: ${info[${PROCESS_INDEX}]}"
         fi
      done

      logged=false
      for DAEMON in "${pmon_managed_processes[@]}"
      do
         info=(${DAEMON//:/ })

         if [ "${info[${PID_INDEX}]}" == "${SKIPPED}" ] ; then
             if [ "${logged}" = false ] ; then
                loginfo "The following 'pmon managed' processes have been 'skipped' ; due to previous restart"
                logged=true
            fi
            loginfo "... process: ${info[${PROCESS_INDEX}]}"
         fi
      done
   fi

   # check for done. If this is not met in timeout then fail is returned
   if [ "$sm_done" = true -a "$pmon_done" = true ] ; then
      GLOBAL_RC=$PATCH_STATUS_OK
      break
   fi

   sleep 1
done

loginfo "Maintenance No-Reboot Patching Status: ${GLOBAL_RC}"

exit ${GLOBAL_RC}