Make package upgrade pacemaker-aware

This change adds alternative logic for handling package updates
on a pacemaker managed node.

"yum list updates" is now run and this script exits early if
there are no packages to update.

If the pacemaker service is not running then the previous puppet
logic remains, so a package update is performed which excludes packages
managed by puppet, and a flag is set to indicate that puppet should
perform an ensure=>latest on all packages it manages.

However if the pacemaker service is running, the following occurs:
- pcs cluster stop is run for this node
- a full yum update is performed
- pcs cluster start is run for this node
- pcs status is run until the hostname for this node appears in the
  Online list

This means that puppet is not involved in the package update process when
the node is managed by pacemaker.

Change-Id: I5ad118552d053dbda280978751167d9fd9da9874
This commit is contained in:
Steve Baker 2015-09-21 13:05:45 +12:00
parent 3e879e6faa
commit 3fd199d8e7

View File

@ -18,15 +18,12 @@ fi
timestamp_dir=/var/lib/overcloud-yum-update timestamp_dir=/var/lib/overcloud-yum-update
mkdir -p $timestamp_dir mkdir -p $timestamp_dir
command_arguments=${command_arguments:-}
# exclude upgrading packages that are handled by config management tooling
for exclude in $(cat /var/lib/tripleo/installed-packages/* | sort -u); do
command_arguments="$command_arguments --exclude $exclude"
done
# sanitise to remove unusual characters # sanitise to remove unusual characters
update_identifier=${update_identifier//[^a-zA-Z0-9-_]/} update_identifier=${update_identifier//[^a-zA-Z0-9-_]/}
# seconds to wait for this node to rejoin the cluster after update
cluster_start_timeout=360
timestamp_file="$timestamp_dir/$update_identifier" timestamp_file="$timestamp_dir/$update_identifier"
if [[ -a "$timestamp_file" ]]; then if [[ -a "$timestamp_file" ]]; then
echo "Not running for already-run timestamp \"$update_identifier\"" echo "Not running for already-run timestamp \"$update_identifier\""
@ -34,6 +31,28 @@ if [[ -a "$timestamp_file" ]]; then
fi fi
touch "$timestamp_file" touch "$timestamp_file"
command_arguments=${command_arguments:-}
list_updates=$(yum list updates)
if [[ "$list_updates" == "" ]]; then
echo "No packages require updating"
exit 0
fi
pacemaker_status=$(systemctl is-active pacemaker)
if [[ "$pacemaker_status" == "active" ]] ; then
echo "Pacemaker running, stopping cluster node and doing full package update"
pcs cluster stop
else
echo "Excluding upgrading packages that are handled by config management tooling"
command_arguments="$command_arguments --skip-broken"
for exclude in $(cat /var/lib/tripleo/installed-packages/* | sort -u); do
command_arguments="$command_arguments --exclude $exclude"
done
fi
command=${command:-update} command=${command:-update}
full_command="yum -y $command $command_arguments" full_command="yum -y $command $command_arguments"
echo "Running: $full_command" echo "Running: $full_command"
@ -43,7 +62,26 @@ return_code=$?
echo "$result" echo "$result"
echo "yum return code: $return_code" echo "yum return code: $return_code"
echo -n "true" > $heat_outputs_path.update_managed_packages if [[ "$pacemaker_status" == "active" ]] ; then
echo "Starting cluster node"
pcs cluster start
hostname=$(hostname -s)
tstart=$(date +%s)
while [[ "$(pcs status | grep "^Online" | grep -F -o $hostname)" == "" ]]; do
sleep 5
tnow=$(date +%s)
if (( tnow-tstart > cluster_start_timeout )) ; then
echo "ERROR $hostname failed to join cluster in $cluster_start_timeout seconds"
pcs status
exit 1
fi
done
pcs status
else
echo -n "true" > $heat_outputs_path.update_managed_packages
fi
echo "Finished yum_update.sh on server $deploy_server_id at `date`" echo "Finished yum_update.sh on server $deploy_server_id at `date`"