dde12b075f
Currently when we call the major-upgrade step we do the following: """ ... if [[ -n $(is_bootstrap_node) ]]; then check_clean_cluster fi ... if [[ -n $(is_bootstrap_node) ]]; then migrate_full_to_ng_ha fi ... for service in $(services_to_migrate); do manage_systemd_service stop "${service%%-clone}" ... done """ The problem with the above code is that it is open to the following race condition: 1. Code gets run first on a non-bootstrap controller node so we start stopping a bunch of services 2. Pacemaker notices will notice that services are down and will mark the service as stopped 3. Code gets run on the bootstrap node (controller-0) and the check_clean_cluster function will fail and exit 4. Eventually also the script on the non-bootstrap controller node will timeout and exit because the cluster never shut down (it never actually started the shutdown because we failed at 3) Let's make sure we first only call the HA NG migration step as a separate heat step. Only afterwards we start shutting down the systemd services on all nodes. We also need to move the STONITH_STATE variable into a file because it is being used across two different scripts (1 and 2) and we need to store that state. Co-Authored-By: Athlan-Guyot Sofer <sathlang@redhat.com> Closes-Bug: #1640407 Change-Id: Ifb9b9e633fcc77604cca2590071656f4b2275c60
110 lines
3.8 KiB
Bash
Executable File
110 lines
3.8 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
set -eu
|
|
|
|
check_cluster()
|
|
{
|
|
if pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; then
|
|
echo_error "ERROR: upgrade cannot start with some cluster nodes being offline"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
check_pcsd()
|
|
{
|
|
if pcs status 2>&1 | grep -E 'Offline'; then
|
|
echo_error "ERROR: upgrade cannot start with some pcsd daemon offline"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
mysql_need_update()
|
|
{
|
|
# Shall we upgrade mysql data directory during the stack upgrade?
|
|
if [ "$mariadb_do_major_upgrade" = "auto" ]; then
|
|
ret=$(is_mysql_upgrade_needed)
|
|
if [ $ret = "1" ]; then
|
|
DO_MYSQL_UPGRADE=1
|
|
else
|
|
DO_MYSQL_UPGRADE=0
|
|
fi
|
|
echo "mysql upgrade required: $DO_MYSQL_UPGRADE"
|
|
elif [ "$mariadb_do_major_upgrade" = "no" ]; then
|
|
DO_MYSQL_UPGRADE=0
|
|
else
|
|
DO_MYSQL_UPGRADE=1
|
|
fi
|
|
}
|
|
|
|
check_disk_for_mysql_dump()
|
|
{
|
|
# Where to backup current database if mysql need to be upgraded
|
|
MYSQL_BACKUP_DIR=/var/tmp/mysql_upgrade_osp
|
|
MYSQL_TEMP_UPGRADE_BACKUP_DIR=/var/lib/mysql-temp-upgrade-backup
|
|
# Spare disk ratio for extra safety
|
|
MYSQL_BACKUP_SIZE_RATIO=1.2
|
|
|
|
mysql_need_update
|
|
|
|
if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then
|
|
if [ $DO_MYSQL_UPGRADE -eq 1 ]; then
|
|
|
|
if [ -d "$MYSQL_BACKUP_DIR" ]; then
|
|
echo_error "Error: $MYSQL_BACKUP_DIR exists already. Likely an upgrade failed previously"
|
|
exit 1
|
|
fi
|
|
mkdir "$MYSQL_BACKUP_DIR"
|
|
if [ $? -ne 0 ]; then
|
|
echo_error "Error: could not create temporary backup directory $MYSQL_BACKUP_DIR"
|
|
exit 1
|
|
fi
|
|
|
|
# the /root/.my.cnf is needed because we set the mysql root
|
|
# password from liberty onwards
|
|
backup_flags="--defaults-extra-file=/root/.my.cnf -u root --flush-privileges --all-databases --single-transaction"
|
|
# While not ideal, this step allows us to calculate exactly how much space the dump
|
|
# will need. Our main goal here is avoiding any chance of corruption due to disk space
|
|
# exhaustion
|
|
backup_size=$(mysqldump $backup_flags 2>/dev/null | wc -c)
|
|
database_size=$(du -cb /var/lib/mysql | tail -1 | awk '{ print $1 }')
|
|
free_space=$(df -B1 --output=avail "$MYSQL_BACKUP_DIR" | tail -1)
|
|
|
|
# we need at least space for a new mysql database + dump of the existing one,
|
|
# times a small factor for additional safety room
|
|
# note: bash doesn't do floating point math or floats in if statements,
|
|
# so use python to apply the ratio and cast it back to integer
|
|
required_space=$(python -c "from __future__ import print_function; print(\"%d\" % int((($database_size + $backup_size) * $MYSQL_BACKUP_SIZE_RATIO)))")
|
|
if [ $required_space -ge $free_space ]; then
|
|
echo_error "Error: not enough free space in $MYSQL_BACKUP_DIR ($required_space bytes required)"
|
|
exit 1
|
|
fi
|
|
fi
|
|
fi
|
|
}
|
|
|
|
check_python_rpm()
|
|
{
|
|
# If for some reason rpm-python are missing we want to error out early enough
|
|
if ! rpm -q rpm-python &> /dev/null; then
|
|
echo_error "ERROR: upgrade cannot start without rpm-python installed"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
check_clean_cluster()
|
|
{
|
|
if pcs status | grep -q Stopped:; then
|
|
echo_error "ERROR: upgrade cannot start with stopped resources on the cluster. Make sure that all the resources are up and running."
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
check_galera_root_password()
|
|
{
|
|
# BZ: 1357112
|
|
if [ ! -e /root/.my.cnf ]; then
|
|
echo_error "ERROR: upgrade cannot be started, the galera password is missing. The overcloud needs update."
|
|
exit 1
|
|
fi
|
|
}
|