config/workerconfig/workerconfig/worker_config

363 lines
11 KiB
Bash

#!/bin/bash
#
# Copyright (c) 2013-2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
#
# chkconfig: 2345 80 80
#
### BEGIN INIT INFO
# Provides: worker_config
# Required-Start:
# Required-Stop:
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: Worker node config agent
### END INIT INFO
. /usr/bin/tsconfig
. /etc/platform/platform.conf
PLATFORM_DIR=/opt/platform
CONFIG_DIR=$CONFIG_PATH
VOLATILE_CONFIG_PASS="/var/run/.config_pass"
VOLATILE_CONFIG_FAIL="/var/run/.config_fail"
LOGFILE="/var/log/worker_config.log"
IMA_POLICY=/etc/ima.policy
# Copy of /opt/platform required for worker_services
VOLATILE_PLATFORM_PATH=$VOLATILE_PATH/cpe_upgrade_opt_platform
DELAY_SEC=600
# If we're on a controller, increase DELAY_SEC to a large value
# to allow for active services to recover from a reboot or DOR
if [ "$nodetype" = "controller" ]
then
DELAY_SEC=900
fi
fatal_error()
{
cat <<EOF
*****************************************************
*****************************************************
$1
*****************************************************
*****************************************************
EOF
touch $VOLATILE_CONFIG_FAIL
logger "Error: $1"
echo "Pausing for 5 seconds..."
sleep 5
exit 1
}
get_ip()
{
local host=$1
# Check /etc/hosts for the hostname
local ipaddr=$(cat /etc/hosts | awk -v host=$host '$2 == host {print $1}')
if [ -n "$ipaddr" ]
then
echo $ipaddr
return
fi
START=$SECONDS
let -i UNTIL=${SECONDS}+${DELAY_SEC}
while [ ${UNTIL} -ge ${SECONDS} ]
do
# Because dnsmasq can resolve both a hostname to both an IPv4 and an IPv6
# address in certain situations, and the last address is the IPv6, which
# would be the management, this is preferred over the IPv4 pxeboot address,
# so take the last address only.
ipaddr=$(dig +short ANY $host|tail -1)
if [[ "$ipaddr" =~ ^[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*$ ]]
then
let -i DURATION=$SECONDS-$START
logger -t $0 -p info "DNS query resolved to $ipaddr (took ${DURATION} secs)"
echo $ipaddr
return
fi
if [[ "$ipaddr" =~ ^[0-9a-z]*\:[0-9a-z\:]*$ ]]
then
let -i DURATION=$SECONDS-$START
logger -t $0 -p info "DNS query resolved to $ipaddr (took ${DURATION} secs)"
echo $ipaddr
return
fi
logger -t $0 -p warn "DNS query failed for $host"
sleep 5
done
let -i DURATION=$SECONDS-$START
logger -t $0 -p warn "DNS query failed after max retries for $host (${DURATION} secs)"
}
wait_for_controller_services()
{
while [ "$SECONDS" -le "$DELAY_SEC" ]
do
# Check to make sure the cloud-services group is enabled
OUTPUT=`sm-query service-group cloud-services`
if [ "$OUTPUT" == "cloud-services active" ]
then
return 0
fi
# Not running Let's wait a couple of seconds and check again
sleep 2
done
return 1
}
start()
{
if [ -f /etc/platform/installation_failed ] ; then
fatal_error "/etc/platform/installation_failed flag is set. Aborting."
fi
function=`echo "$subfunction" | cut -f 2 -d','`
if [ "$nodetype" != "worker" -a "$function" != "worker" ] ; then
logger -t $0 -p warn "exiting because this is not worker node"
exit 0
fi
# If we're on a controller, ensure we only run if the controller config is complete
if [ "$nodetype" = "controller" -a ! -f /etc/platform/.initial_controller_config_complete ]
then
logger -t $0 -p warn "exiting because this is controller that has not completed initial config"
exit 0
fi
# Exit in error if called while the fail flag file is present
if [ -e $VOLATILE_CONFIG_FAIL ] ; then
logger -t $0 -p warn "exiting due to presence of $VOLATILE_CONFIG_FAIL file"
exit 1
fi
# remove previous pass flag file so that if this fails we don't
# end up with both pass and fail flag files present
rm -f $VOLATILE_CONFIG_PASS
if [ "$(stat -c %d:%i /)" != "$(stat -c %d:%i /proc/1/root/.)" ]; then
# we are in chroot installer environment
exit 0
fi
echo "Configuring worker node..."
###### SECURITY PROFILE (EXTENDED) #################
# If we are in Extended Security Profile mode, #
# then before anything else, we need to load the #
# IMA Policy so that all configuration operations #
# can be measured and appraised #
# #
# N.B: Only run for worker nodetype since for AIO #
# controllerconfig would have already enabled IMA #
# policy #
#####################################################
if [ "$nodetype" = "worker" -a "${security_profile}" = "extended" ]
then
IMA_LOAD_PATH=/sys/kernel/security/ima/policy
if [ -f ${IMA_LOAD_PATH} ]; then
echo "Loading IMA Policy"
# Best effort operation only, if policy is
# malformed then audit logs will indicate this,
# and customer will need to load policy manually
cat $IMA_POLICY > ${IMA_LOAD_PATH}
[ $? -eq 0 ] || logger -t $0 -p warn "IMA Policy could not be loaded, see audit.log"
else
# the securityfs mount should have been
# created had the IMA module loaded properly.
# This is therefore a fatal error
fatal_error "${IMA_LOAD_PATH} not available. Aborting."
fi
fi
HOST=$(hostname)
if [ -z "$HOST" -o "$HOST" = "localhost" ]
then
fatal_error "Host undefined. Unable to perform config"
fi
date "+%FT%T.%3N" > $LOGFILE
IPADDR=$(get_ip $HOST)
if [ -z "$IPADDR" ]
then
fatal_error "Unable to get IP from host: $HOST"
fi
# wait for controller services to be ready if it is an AIO system
# since ping the loopback interface always returns ok
if [ -e "${PLATFORM_SIMPLEX_FLAG}" ]
then
echo "Wait for the controller services"
wait_for_controller_services
if [ $? -ne 0 ]
then
fatal_error "Controller services are not ready"
fi
else
/usr/local/bin/connectivity_test -t ${DELAY_SEC} -i ${IPADDR} controller-platform-nfs
if [ $? -ne 0 ]
then
# 'controller-platform-nfs' is not available from management address
fatal_error "Unable to contact active controller (controller-platform-nfs) from management address"
fi
fi
# Write the hostname to file so it's persistent
echo $HOST > /etc/hostname
if ! [ -e "${PLATFORM_SIMPLEX_FLAG}" ]
then
# Mount the platform filesystem (if necessary - could be auto-mounted by now)
mkdir -p $PLATFORM_DIR
if [ ! -f $CONFIG_DIR/hosts ]
then
nfs-mount controller-platform-nfs:$PLATFORM_DIR $PLATFORM_DIR > /dev/null 2>&1
RC=$?
if [ $RC -ne 0 ]
then
fatal_error "Unable to mount $PLATFORM_DIR (RC:$RC)"
fi
fi
# Copy over external_ceph config files
if [ -e $CONFIG_DIR/ceph-config ]
then
cp $CONFIG_DIR/ceph-config/*.conf /etc/ceph/
if [ $? -ne 0 ]
then
fatal_error "Unable to copy ceph-external config files"
fi
fi
fi
if [ "$nodetype" = "worker" ]
then
# Check whether our installed load matches the active controller
CONTROLLER_UUID=`curl -sf http://controller:${http_port}/feed/rel-${SW_VERSION}/install_uuid`
if [ $? -ne 0 ]
then
fatal_error "Unable to retrieve installation uuid from active controller"
fi
if [ "$INSTALL_UUID" != "$CONTROLLER_UUID" ]
then
fatal_error "This node is running a different load than the active controller and must be reinstalled"
fi
mkdir -p /etc/docker/certs.d/registry.local:9001/
chmod 700 /etc/docker/certs.d/registry.local:9001/
cp $CONFIG_DIR/registry-cert.crt /etc/docker/certs.d/registry.local:9001/registry-cert.crt
if [ $? -ne 0 ]
then
fatal_error "Unable to copy $CONFIG_DIR/registry-cert.crt to docker dir"
fi
fi
if [ -e $CONFIG_DIR/registry.central/registry-cert.crt ]
then
mkdir -p /etc/docker/certs.d/registry.central:9001/
chmod 700 /etc/docker/certs.d/registry.central:9001/
cp $CONFIG_DIR/registry.central/registry-cert.crt /etc/docker/certs.d/registry.central:9001/registry-cert.crt
if [ $? -ne 0 ]
then
fatal_error "Unable to copy $CONFIG_DIR/registry-cert.crt to docker dir for central registry"
fi
fi
# Copy over k8s-coredump-handler token
if [ -e $CONFIG_DIR/k8s-coredump-conf.json ]
then
cp $CONFIG_DIR/k8s-coredump-conf.json /etc/k8s-coredump-conf.json
if [ $? -ne 0 ]
then
fatal_error "Unable to copy k8s-coredump-handler token config file"
else
chmod 600 /etc/k8s-coredump-conf.json
fi
fi
# banner customization always returns 0, success:
/usr/sbin/install_banner_customization
cp $CONFIG_DIR/hosts /etc/hosts
if [ $? -ne 0 ]
then
fatal_error "Unable to copy $CONFIG_DIR/hosts"
fi
if [ "$nodetype" = "controller" -a "$HOST" = "controller-1" ]
then
# In a small system restore, there may be instance data that we want to
# restore. Copy it and delete it.
MATE_INSTANCES_DIR="$CONFIG_DIR/controller-1_nova_instances"
if [ -d "$MATE_INSTANCES_DIR" ]
then
echo "Restoring instance data from mate controller"
cp -Rp $MATE_INSTANCES_DIR/* /etc/nova/instances/
rm -rf $MATE_INSTANCES_DIR
fi
fi
# Apply the puppet manifest
HOST_HIERA=${PUPPET_PATH}/hieradata/${HOST}.yaml
if [ -f ${HOST_HIERA} ]; then
echo "$0: Running puppet manifest apply"
puppet-manifest-apply.sh ${PUPPET_PATH}/hieradata ${HOST} worker
RC=$?
if [ $RC -ne 0 ];
then
fatal_error "Failed to run the puppet manifest (RC:$RC)"
fi
else
fatal_error "Host configuration not yet available for this node ($(hostname)=${HOST}); aborting configuration."
fi
# Load Network Block Device
modprobe nbd
if [ $? -ne 0 ]
then
echo "WARNING: Unable to load kernel module: nbd."
logger "WARNING: Unable to load kernel module: nbd."
fi
#Run mount command to mount any NFS filesystems that required network access
/bin/mount -a -t nfs
RC=$?
if [ $RC -ne 0 ]
then
fatal_error "Unable to mount NFS filesystems (RC:$RC)"
fi
touch $VOLATILE_CONFIG_PASS
}
stop ()
{
# Nothing to do
return
}
case "$1" in
start)
start
;;
stop)
stop
;;
*)
echo "Usage: $0 {start|stop}"
exit 1
;;
esac
exit 0