From 3fd199d8e7fa59979a0aff7d78cce61d6cd607a8 Mon Sep 17 00:00:00 2001 From: Steve Baker Date: Mon, 21 Sep 2015 13:05:45 +1200 Subject: [PATCH] Make package upgrade pacemaker-aware This change adds alternative logic for handling package updates on a pacemaker managed node. "yum list updates" is now run and this script exits early if there are no packages to update. If the pacemaker service is not running then the previous puppet logic remains, so a package update is performed which excludes packages managed by puppet, and a flag is set to indicate that puppet should perform an ensure=>latest on all packages it manages. However if the pacemaker service is running, the following occurs: - pcs cluster stop is run for this node - a full yum update is performed - pcs cluster start is run for this node - pcs status is run until the hostname for this node appears in the Online list This means that puppet is not involved in the package update process when the node is managed by pacemaker. Change-Id: I5ad118552d053dbda280978751167d9fd9da9874 --- extraconfig/tasks/yum_update.sh | 52 ++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/extraconfig/tasks/yum_update.sh b/extraconfig/tasks/yum_update.sh index e74c4d8b44..eaeb7ef0d5 100755 --- a/extraconfig/tasks/yum_update.sh +++ b/extraconfig/tasks/yum_update.sh @@ -18,15 +18,12 @@ fi timestamp_dir=/var/lib/overcloud-yum-update mkdir -p $timestamp_dir -command_arguments=${command_arguments:-} -# exclude upgrading packages that are handled by config management tooling -for exclude in $(cat /var/lib/tripleo/installed-packages/* | sort -u); do - command_arguments="$command_arguments --exclude $exclude" -done - # sanitise to remove unusual characters update_identifier=${update_identifier//[^a-zA-Z0-9-_]/} +# seconds to wait for this node to rejoin the cluster after update +cluster_start_timeout=360 + timestamp_file="$timestamp_dir/$update_identifier" if [[ -a "$timestamp_file" ]]; then echo "Not running for already-run timestamp \"$update_identifier\"" @@ -34,6 +31,28 @@ if [[ -a "$timestamp_file" ]]; then fi touch "$timestamp_file" +command_arguments=${command_arguments:-} + +list_updates=$(yum list updates) + +if [[ "$list_updates" == "" ]]; then + echo "No packages require updating" + exit 0 +fi + +pacemaker_status=$(systemctl is-active pacemaker) + +if [[ "$pacemaker_status" == "active" ]] ; then + echo "Pacemaker running, stopping cluster node and doing full package update" + pcs cluster stop +else + echo "Excluding upgrading packages that are handled by config management tooling" + command_arguments="$command_arguments --skip-broken" + for exclude in $(cat /var/lib/tripleo/installed-packages/* | sort -u); do + command_arguments="$command_arguments --exclude $exclude" + done +fi + command=${command:-update} full_command="yum -y $command $command_arguments" echo "Running: $full_command" @@ -43,7 +62,26 @@ return_code=$? echo "$result" echo "yum return code: $return_code" -echo -n "true" > $heat_outputs_path.update_managed_packages +if [[ "$pacemaker_status" == "active" ]] ; then + echo "Starting cluster node" + pcs cluster start + + hostname=$(hostname -s) + tstart=$(date +%s) + while [[ "$(pcs status | grep "^Online" | grep -F -o $hostname)" == "" ]]; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > cluster_start_timeout )) ; then + echo "ERROR $hostname failed to join cluster in $cluster_start_timeout seconds" + pcs status + exit 1 + fi + done + pcs status + +else + echo -n "true" > $heat_outputs_path.update_managed_packages +fi echo "Finished yum_update.sh on server $deploy_server_id at `date`"