config/controllerconfig/controllerconfig/scripts/controller_config

567 lines
17 KiB
Bash
Executable File

#!/bin/bash
#
# Copyright (c) 2013-2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
#
# chkconfig: 2345 80 80
#
### BEGIN INIT INFO
# Provides: controller_config
# Short-Description: Controller node config agent
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Required-Start:
# Required-Stop:
### END INIT INFO
. /usr/bin/tsconfig
. /etc/platform/platform.conf
PLATFORM_DIR=/opt/platform
VAULT_DIR=$PLATFORM_DIR/.keyring/${SW_VERSION}/python_keyring
CONFIG_DIR=$CONFIG_PATH
VOLATILE_CONFIG_PASS="/var/run/.config_pass"
VOLATILE_CONFIG_FAIL="/var/run/.config_fail"
COMPLETED="/etc/platform/.initial_config_complete"
INITIAL_MANIFEST_APPLY_FAILED="/etc/platform/.initial_manifest_apply_failed"
DELAY_SEC=70
CONTROLLER_UPGRADE_STARTED_FILE="$(basename ${CONTROLLER_UPGRADE_STARTED_FLAG})"
PUPPET_DOWNLOAD=/tmp/puppet.download
IMA_POLICY=/etc/ima.policy
fatal_error()
{
cat <<EOF
*****************************************************
*****************************************************
$1
*****************************************************
*****************************************************
EOF
# Don't set the .config_fail flag if the config
# complete flag is not set first.
if [ -e $COMPLETED ]
then
touch $VOLATILE_CONFIG_FAIL
fi
if [ -e /usr/bin/logger ]
then
logger "Error: $1"
fi
echo "Pausing for 5 seconds..."
sleep 5
if [ -d ${PUPPET_DOWNLOAD} ]; then
rm -rf ${PUPPET_DOWNLOAD}
fi
exit 1
}
exit_error()
{
cat <<EOF
*****************************************************
*****************************************************
$1
*****************************************************
*****************************************************
EOF
if [ -e /usr/bin/logger ]
then
logger "Exit error: $1"
fi
echo "Pausing for 5 seconds..."
sleep 5
exit 1
}
get_ip()
{
local host=$1
# Check /etc/hosts for the hostname
local ipaddr=$(cat /etc/hosts | awk -v host=$host '$2 == host {print $1}')
if [ -n "$ipaddr" ]
then
echo $ipaddr
return
fi
# Try the DNS query
# Because dnsmasq can resolve both a hostname to both an IPv4 and an IPv6
# address in certain situations, and the last address is the IPv6, which
# would be the management, this is preferred over the IPv4 pxeboot address,
# so take the last address only.
ipaddr=$(dig +short ANY $host|tail -1)
if [[ "$ipaddr" =~ ^[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*$ ]]
then
echo $ipaddr
return
fi
if [[ "$ipaddr" =~ ^[0-9a-z]*\:[0-9a-z\:]*$ ]]
then
echo $ipaddr
return
fi
}
mount_platform_dir()
{
if [ -e "${PLATFORM_SIMPLEX_FLAG}" ]
then
systemctl start drbd.service
if [ $? -ne 0 ]
then
fatal_error "Unable to start drbd.service"
fi
# The drbd-platform FS may already be "up", so we won't check for errors
drbdadm up drbd-platform 2>/dev/null
drbdadm primary drbd-platform
if [ $? -ne 0 ]
then
drbdadm down drbd-platform
systemctl stop drbd.service
fatal_error "Failed to make drbd-platform primary"
fi
mount $PLATFORM_DIR
if [ $? -ne 0 ]
then
drbdadm secondary drbd-platform
drbdadm down drbd-platform
systemctl stop drbd.service
fatal_error "Unable to mount $PLATFORM_DIR"
fi
else
mkdir -p $PLATFORM_DIR
nfs-mount controller-platform-nfs:$PLATFORM_DIR $PLATFORM_DIR
if [ $? -ne 0 ]
then
fatal_error "Unable to mount $PLATFORM_DIR"
fi
fi
}
umount_platform_dir()
{
if [ -e "${PLATFORM_SIMPLEX_FLAG}" ]
then
umount $PLATFORM_DIR
drbdadm secondary drbd-platform
drbdadm down drbd-platform
systemctl stop drbd.service
else
umount $PLATFORM_DIR
fi
}
start()
{
if [ -f /etc/platform/installation_failed ] ; then
fatal_error "/etc/platform/installation_failed flag is set. Aborting."
fi
###### SECURITY PROFILE (EXTENDED) #################
# If we are in Extended Security Profile mode, #
# then before anything else, we need to load the #
# IMA Policy so that all configuration operations #
# can be measured and appraised #
#####################################################
if [ "${security_profile}" = "extended" ]
then
IMA_LOAD_PATH=/sys/kernel/security/ima/policy
if [ -f ${IMA_LOAD_PATH} ]; then
echo "Loading IMA Policy"
# Best effort operation only, if policy is
# malformed then audit logs will indicate this,
# and customer will need to load policy manually
cat $IMA_POLICY > ${IMA_LOAD_PATH}
[ $? -eq 0 ] || logger -t $0 -p warn "IMA Policy could not be loaded, see audit.log"
else
# the securityfs mount should have been
# created had the IMA module loaded properly.
# This is therefore a fatal error
fatal_error "${IMA_LOAD_PATH} not available. Aborting."
fi
fi
# If hostname is undefined or localhost, something is wrong
HOST=$(hostname)
if [ -z "$HOST" -o "$HOST" = "localhost" ]
then
fatal_error "Host undefined. Unable to perform config"
fi
if [ $HOST != "controller-0" -a $HOST != "controller-1" ]
then
fatal_error "Invalid hostname for controller node: $HOST"
fi
IPADDR=$(get_ip $HOST)
if [ -z "$IPADDR" ]
then
fatal_error "Unable to get IP from host: $HOST"
fi
if [ -f ${INITIAL_MANIFEST_APPLY_FAILED} ]
then
fatal_error "Initial manifest application failed; Host must be re-installed."
fi
echo "Configuring controller node..."
if [ ! -e "${PLATFORM_SIMPLEX_FLAG}" ]
then
# try for DELAY_SEC seconds to reach controller-platform-nfs
/usr/local/bin/connectivity_test -t ${DELAY_SEC} -i ${IPADDR} controller-platform-nfs
if [ $? -ne 0 ]
then
# 'controller-platform-nfs' is not available, just exit
exit_error "Unable to contact active controller (controller-platform-nfs). Boot will continue."
fi
# Check whether our installed load matches the active controller
CONTROLLER_UUID=`curl -sf http://controller:${http_port}/feed/rel-${SW_VERSION}/install_uuid`
if [ $? -ne 0 ]
then
fatal_error "Unable to retrieve installation uuid from active controller"
fi
INSTALL_UUID=`cat /www/pages/feed/rel-${SW_VERSION}/install_uuid`
if [ "$INSTALL_UUID" != "$CONTROLLER_UUID" ]
then
fatal_error "This node is running a different load than the active controller and must be reinstalled"
fi
fi
mount_platform_dir
# Cleanup from any previous config runs
if [ -e $VOLATILE_CONFIG_FAIL ]
then
rm -f $VOLATILE_CONFIG_FAIL
fi
if [ -e $VOLATILE_CONFIG_PASS ]
then
rm -f $VOLATILE_CONFIG_PASS
fi
if [ -e $CONFIG_DIR/.license ]
then
cp $CONFIG_DIR/.license /etc/platform/.license
if [ $? -ne 0 ]
then
fatal_error "Unable to copy $CONFIG_DIR/.license"
fi
fi
if [ -e $CONFIG_DIR/server-cert.pem ]
then
cp $CONFIG_DIR/server-cert.pem /etc/ssl/private/server-cert.pem
if [ $? -ne 0 ]
then
fatal_error "Unable to copy $CONFIG_DIR/server-cert.pem"
fi
fi
if [ -e $CONFIG_DIR/registry-cert-pkcs1.key ]
then
cp $CONFIG_DIR/registry-cert-pkcs1.key /etc/ssl/private/registry-cert-pkcs1.key
if [ $? -ne 0 ]
then
fatal_error "Unable to copy $CONFIG_DIR/registry-cert-pkcs1.key"
fi
fi
if [ -e $CONFIG_DIR/registry-cert.key ]
then
cp $CONFIG_DIR/registry-cert.key /etc/ssl/private/registry-cert.key
if [ $? -ne 0 ]
then
fatal_error "Unable to copy $CONFIG_DIR/registry-cert.key"
fi
fi
if [ -e $CONFIG_DIR/registry-cert.crt ]
then
cp $CONFIG_DIR/registry-cert.crt /etc/ssl/private/registry-cert.crt
if [ $? -ne 0 ]
then
fatal_error "Unable to copy $CONFIG_DIR/registry-cert.crt to certificates dir"
fi
mkdir -p /etc/docker/certs.d/registry.local:9001/
chmod 700 /etc/docker/certs.d/registry.local:9001/
cp $CONFIG_DIR/registry-cert.crt /etc/docker/certs.d/registry.local:9001/registry-cert.crt
if [ $? -ne 0 ]
then
fatal_error "Unable to copy $CONFIG_DIR/registry-cert.crt to docker dir"
fi
fi
if [ -e $CONFIG_DIR/registry.central/registry-cert.crt ]
then
mkdir -p /etc/docker/certs.d/registry.central:9001/
chmod 700 /etc/docker/certs.d/registry.central:9001/
cp $CONFIG_DIR/registry.central/registry-cert.crt /etc/docker/certs.d/registry.central:9001/registry-cert.crt
if [ $? -ne 0 ]
then
fatal_error "Unable to copy $CONFIG_DIR/registry-cert.crt to docker dir for central registry"
fi
fi
if [ -e $CONFIG_DIR/admin-ep-cert.pem ]
then
cp $CONFIG_DIR/admin-ep-cert.pem /etc/ssl/private/
if [ $? -ne 0 ]
then
fatal_error "Unable to copy $CONFIG_DIR/admin-ep-cert.pem to certificates dir"
fi
fi
if [ -e $CONFIG_DIR/dc-adminep-root-ca.crt ]
then
cp $CONFIG_DIR/dc-adminep-root-ca.crt /etc/pki/ca-trust/source/anchors/
if [ $? -ne 0 ]
then
fatal_error "Unable to copy $CONFIG_DIR/dc-adminep-root-ca.crt to certificates dir"
fi
# Update system trusted CA cert list with the new CA cert.
update-ca-trust extract
if [ $? -ne 0 ]
then
fatal_error "Unable to update system trusted CA certificate list"
fi
fi
if [ -e $CONFIG_DIR/openstack ]
then
if [ ! -e /etc/ssl/private/openstack ]
then
mkdir -p /etc/ssl/private/openstack
chmod 755 /etc/ssl/private/openstack
fi
cp -p $CONFIG_DIR/openstack/* /etc/ssl/private/openstack
if [ $? -ne 0 ]
then
fatal_error "Unable to copy openstack certificate files"
fi
fi
# Copy over external_ceph config files
if [ -e $CONFIG_DIR/ceph-config ]
then
cp $CONFIG_DIR/ceph-config/*.conf /etc/ceph/
if [ $? -ne 0 ]
then
fatal_error "Unable to copy ceph-external config files"
fi
fi
# Copy over kube api server encryption provider config
if [ -e $CONFIG_DIR/kubernetes/encryption-provider.yaml ]
then
cp $CONFIG_DIR/kubernetes/encryption-provider.yaml /etc/kubernetes/
if [ $? -ne 0 ]
then
fatal_error "Unable to copy kube api server encryption provider config file"
else
chmod 600 /etc/kubernetes/encryption-provider.yaml
fi
fi
# Keep the /opt/branding directory to preserve any new files
rm -rf /opt/branding/*.tgz
cp $CONFIG_DIR/branding/*.tgz /opt/branding 2>/dev/null
# banner customization always returns 0, success:
/usr/sbin/install_banner_customization
cp $CONFIG_DIR/hosts /etc/hosts
if [ $? -ne 0 ]
then
fatal_error "Unable to copy $CONFIG_DIR/hosts"
fi
hostname > /etc/hostname
if [ $? -ne 0 ]
then
fatal_error "Unable to write /etc/hostname"
fi
# Our PXE config files are located in the config directory. Create a
# symbolic link if it is not already created.
if [ ! -L /pxeboot/pxelinux.cfg ]
then
ln -sf $CONFIG_DIR/pxelinux.cfg /pxeboot/pxelinux.cfg
fi
# Upgrade related checks
if [ ! -e "${PLATFORM_SIMPLEX_FLAG}" ]
then
VOLATILE_ETC_PLATFORM_MOUNT=$VOLATILE_PATH/etc_platform
mkdir $VOLATILE_ETC_PLATFORM_MOUNT
nfs-mount controller-platform-nfs:/etc/platform $VOLATILE_ETC_PLATFORM_MOUNT
if [ $? -eq 0 ]
then
# Generate Rollback flag if necessary
if [ -f $VOLATILE_ETC_PLATFORM_MOUNT/.upgrade_rollback ]
then
touch $UPGRADE_ROLLBACK_FLAG
fi
# Check whether we are upgrading controller-1.
UPGRADE_CONTROLLER=0
if [ -f $VOLATILE_ETC_PLATFORM_MOUNT/.upgrade_controller_1 ]
then
if [ -f $VOLATILE_ETC_PLATFORM_MOUNT/.upgrade_controller_1_fail ]
then
exit_error "Controller-1 upgrade previously failed. Upgrade must be aborted."
fi
if [ -f $VOLATILE_ETC_PLATFORM_MOUNT/$CONTROLLER_UPGRADE_STARTED_FILE ]
then
touch $VOLATILE_ETC_PLATFORM_MOUNT/.upgrade_controller_1_fail
exit_error "Controller-1 data migration already in progress. Upgrade must be aborted"
fi
touch $VOLATILE_ETC_PLATFORM_MOUNT/$CONTROLLER_UPGRADE_STARTED_FILE
UPGRADE_CONTROLLER=1
fi
# Check whether software versions match on the two controllers
MATE_SW_VERSION=`grep sw_version $VOLATILE_ETC_PLATFORM_MOUNT/platform.conf | awk -F\= '{print $2}'`
if [ $SW_VERSION != $MATE_SW_VERSION ]
then
echo "Controllers are running different software versions"
echo "SW_VERSION: $SW_VERSION MATE_SW_VERSION: $MATE_SW_VERSION"
# This environment variable allows puppet manifests to behave
# differently when the controller software versions do not match.
export CONTROLLER_SW_VERSIONS_MISMATCH=true
fi
umount $VOLATILE_ETC_PLATFORM_MOUNT
rmdir $VOLATILE_ETC_PLATFORM_MOUNT
if [ $UPGRADE_CONTROLLER -eq 1 ]
then
#R3 Removed
umount_platform_dir
echo "Upgrading controller-1. This will take some time..."
/usr/bin/upgrade_controller $MATE_SW_VERSION $SW_VERSION
exit $?
fi
else
umount_platform_dir
rmdir $VOLATILE_ETC_PLATFORM_MOUNT
fatal_error "Unable to mount /etc/platform"
fi
fi
mkdir -p /etc/postgresql/
cp -p $CONFIG_DIR/postgresql/*.conf /etc/postgresql/
if [ $? -ne 0 ]
then
fatal_error "Unable to copy .conf files to /etc/postgresql"
fi
# Copy the hieradata and the staging secured vault
rm -rf ${PUPPET_DOWNLOAD}
cp -R $PUPPET_PATH ${PUPPET_DOWNLOAD}
if [ $? -ne 0 ]
then
umount_platform_dir
fatal_error "Failed to copy puppet directory $PUPPET_PATH"
fi
cp -RL $VAULT_DIR /tmp
if [ $? -ne 0 ]
then
umount_platform_dir
fatal_error "Failed to copy vault directory $VAULT_DIR"
fi
# Unmount
umount_platform_dir
# Apply the puppet manifest
HOST_HIERA=${PUPPET_DOWNLOAD}/hieradata/${IPADDR}.yaml
if [ -f ${HOST_HIERA} ]; then
echo "$0: Running puppet manifest apply"
puppet-manifest-apply.sh ${PUPPET_DOWNLOAD}/hieradata ${IPADDR} controller
RC=$?
if [ $RC -ne 0 ];
then
fatal_error "Failed to run the puppet manifest (RC:$RC)"
if [ ! -f ${COMPLETED} ]
then
# The initial manifest application failed. We need to remember
# this so we don't attempt to reapply them after a reboot.
# Many of our manifests do not support being run more than
# once with the $COMPLETED flag unset.
touch $INITIAL_MANIFEST_APPLY_FAILED
fatal_error "Failed to run the puppet manifest (RC:$RC); Host must be re-installed."
else
fatal_error "Failed to run the puppet manifest (RC:$RC)"
fi
fi
else
fatal_error "Host configuration not yet available for this node ($(hostname)=${IPADDR}); aborting configuration."
fi
# Cleanup ${PUPPET_DOWNLOAD} and the secured vault
rm -rf ${PUPPET_DOWNLOAD}
rm -rf /tmp/python_keyring
if [ ! -e "${PLATFORM_SIMPLEX_FLAG}" ]
then
# The second controller is now configured - remove the simplex flag on
# the mate controller.
mkdir /tmp/mateflag
nfs-mount controller-platform-nfs:/etc/platform /tmp/mateflag
if [ $? -eq 0 ]
then
rm -f /tmp/mateflag/simplex
umount /tmp/mateflag
rmdir /tmp/mateflag
else
echo "Unable to mount /etc/platform"
fi
fi
touch $COMPLETED
touch $VOLATILE_CONFIG_PASS
}
stop ()
{
# Nothing to do
return
}
case "$1" in
start)
start
;;
stop)
stop
;;
*)
echo "Usage: $0 {start|stop}"
exit 1
;;
esac
exit 0