658df6e2f3
This commit to remove the usage of the mgmt_ip in the host table in favor of either controller FQDN for AIO-SX or the management address configured in the address table. Test Plan: PASS: AIO-SX and AIO-DX virtualbox installation IPv4/IPv6 PASS: Standard virtualbox installation IPv6 PASS: DC virtualbox installation IPv4 ( AIO-SX/DX subclouds ) PASS: AIO-SX and AIO-DX installation IPv4/IPv6 PASS: AIO-DX plus installation IPv6 PASS: DC IPv6 and subcloud AIO-SX PASS: AIO-DX host-swact PASS: DC IPv4 virtualbox with subcloud AIO-DX and AIO-DX PASS: AIO-SX to AIO-DX migration PASS: netstat -tupl ( no services are using the MGMT IP address ) PASS: Ran sanity/regression tests PASS: Backup and Restore for AIO-SX/AIO-DX / DC subcloud AIO-SX PASS: Add and unlock worker node on a deployed standard system Story: 2010722 Task: 48567 Depends-on: https://review.opendev.org/c/starlingx/config/+/886208 Signed-off-by: Teresa Ho <teresa.ho@windriver.com> Change-Id: Id2a79ee291b4f706611ebd8eeceaed31e6ca5aa5
363 lines
11 KiB
Bash
363 lines
11 KiB
Bash
#!/bin/bash
|
|
#
|
|
# Copyright (c) 2013-2019 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
|
|
#
|
|
# chkconfig: 2345 80 80
|
|
#
|
|
|
|
### BEGIN INIT INFO
|
|
# Provides: worker_config
|
|
# Required-Start:
|
|
# Required-Stop:
|
|
# Default-Start: 2 3 4 5
|
|
# Default-Stop: 0 1 6
|
|
# Short-Description: Worker node config agent
|
|
### END INIT INFO
|
|
|
|
. /usr/bin/tsconfig
|
|
. /etc/platform/platform.conf
|
|
|
|
PLATFORM_DIR=/opt/platform
|
|
CONFIG_DIR=$CONFIG_PATH
|
|
VOLATILE_CONFIG_PASS="/var/run/.config_pass"
|
|
VOLATILE_CONFIG_FAIL="/var/run/.config_fail"
|
|
LOGFILE="/var/log/worker_config.log"
|
|
IMA_POLICY=/etc/ima.policy
|
|
|
|
# Copy of /opt/platform required for worker_services
|
|
VOLATILE_PLATFORM_PATH=$VOLATILE_PATH/cpe_upgrade_opt_platform
|
|
|
|
DELAY_SEC=600
|
|
# If we're on a controller, increase DELAY_SEC to a large value
|
|
# to allow for active services to recover from a reboot or DOR
|
|
if [ "$nodetype" = "controller" ]
|
|
then
|
|
DELAY_SEC=900
|
|
fi
|
|
|
|
fatal_error()
|
|
{
|
|
cat <<EOF
|
|
*****************************************************
|
|
*****************************************************
|
|
$1
|
|
*****************************************************
|
|
*****************************************************
|
|
EOF
|
|
touch $VOLATILE_CONFIG_FAIL
|
|
logger "Error: $1"
|
|
echo "Pausing for 5 seconds..."
|
|
sleep 5
|
|
exit 1
|
|
}
|
|
|
|
get_ip()
|
|
{
|
|
local host=$1
|
|
|
|
# Check /etc/hosts for the hostname
|
|
local ipaddr=$(cat /etc/hosts | awk -v host=$host '$2 == host {print $1}')
|
|
if [ -n "$ipaddr" ]
|
|
then
|
|
echo $ipaddr
|
|
return
|
|
fi
|
|
|
|
START=$SECONDS
|
|
let -i UNTIL=${SECONDS}+${DELAY_SEC}
|
|
while [ ${UNTIL} -ge ${SECONDS} ]
|
|
do
|
|
|
|
# Because dnsmasq can resolve both a hostname to both an IPv4 and an IPv6
|
|
# address in certain situations, and the last address is the IPv6, which
|
|
# would be the management, this is preferred over the IPv4 pxeboot address,
|
|
# so take the last address only.
|
|
ipaddr=$(dig +short ANY $host|tail -1)
|
|
if [[ "$ipaddr" =~ ^[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*$ ]]
|
|
then
|
|
let -i DURATION=$SECONDS-$START
|
|
logger -t $0 -p info "DNS query resolved to $ipaddr (took ${DURATION} secs)"
|
|
echo $ipaddr
|
|
return
|
|
fi
|
|
if [[ "$ipaddr" =~ ^[0-9a-z]*\:[0-9a-z\:]*$ ]]
|
|
then
|
|
let -i DURATION=$SECONDS-$START
|
|
logger -t $0 -p info "DNS query resolved to $ipaddr (took ${DURATION} secs)"
|
|
echo $ipaddr
|
|
return
|
|
fi
|
|
logger -t $0 -p warn "DNS query failed for $host"
|
|
sleep 5
|
|
done
|
|
let -i DURATION=$SECONDS-$START
|
|
logger -t $0 -p warn "DNS query failed after max retries for $host (${DURATION} secs)"
|
|
}
|
|
|
|
wait_for_controller_services()
|
|
{
|
|
while [ "$SECONDS" -le "$DELAY_SEC" ]
|
|
do
|
|
# Check to make sure the cloud-services group is enabled
|
|
OUTPUT=`sm-query service-group cloud-services`
|
|
if [ "$OUTPUT" == "cloud-services active" ]
|
|
then
|
|
return 0
|
|
fi
|
|
# Not running Let's wait a couple of seconds and check again
|
|
sleep 2
|
|
done
|
|
return 1
|
|
}
|
|
|
|
start()
|
|
{
|
|
if [ -f /etc/platform/installation_failed ] ; then
|
|
fatal_error "/etc/platform/installation_failed flag is set. Aborting."
|
|
fi
|
|
|
|
function=`echo "$subfunction" | cut -f 2 -d','`
|
|
|
|
if [ "$nodetype" != "worker" -a "$function" != "worker" ] ; then
|
|
logger -t $0 -p warn "exiting because this is not worker node"
|
|
exit 0
|
|
fi
|
|
|
|
# If we're on a controller, ensure we only run if the controller config is complete
|
|
if [ "$nodetype" = "controller" -a ! -f /etc/platform/.initial_controller_config_complete ]
|
|
then
|
|
logger -t $0 -p warn "exiting because this is controller that has not completed initial config"
|
|
exit 0
|
|
fi
|
|
|
|
# Exit in error if called while the fail flag file is present
|
|
if [ -e $VOLATILE_CONFIG_FAIL ] ; then
|
|
logger -t $0 -p warn "exiting due to presence of $VOLATILE_CONFIG_FAIL file"
|
|
exit 1
|
|
fi
|
|
|
|
# remove previous pass flag file so that if this fails we don't
|
|
# end up with both pass and fail flag files present
|
|
rm -f $VOLATILE_CONFIG_PASS
|
|
|
|
|
|
if [ "$(stat -c %d:%i /)" != "$(stat -c %d:%i /proc/1/root/.)" ]; then
|
|
# we are in chroot installer environment
|
|
exit 0
|
|
fi
|
|
echo "Configuring worker node..."
|
|
|
|
###### SECURITY PROFILE (EXTENDED) #################
|
|
# If we are in Extended Security Profile mode, #
|
|
# then before anything else, we need to load the #
|
|
# IMA Policy so that all configuration operations #
|
|
# can be measured and appraised #
|
|
# #
|
|
# N.B: Only run for worker nodetype since for AIO #
|
|
# controllerconfig would have already enabled IMA #
|
|
# policy #
|
|
#####################################################
|
|
if [ "$nodetype" = "worker" -a "${security_profile}" = "extended" ]
|
|
then
|
|
IMA_LOAD_PATH=/sys/kernel/security/ima/policy
|
|
if [ -f ${IMA_LOAD_PATH} ]; then
|
|
echo "Loading IMA Policy"
|
|
# Best effort operation only, if policy is
|
|
# malformed then audit logs will indicate this,
|
|
# and customer will need to load policy manually
|
|
cat $IMA_POLICY > ${IMA_LOAD_PATH}
|
|
[ $? -eq 0 ] || logger -t $0 -p warn "IMA Policy could not be loaded, see audit.log"
|
|
else
|
|
# the securityfs mount should have been
|
|
# created had the IMA module loaded properly.
|
|
# This is therefore a fatal error
|
|
fatal_error "${IMA_LOAD_PATH} not available. Aborting."
|
|
fi
|
|
fi
|
|
|
|
HOST=$(hostname)
|
|
if [ -z "$HOST" -o "$HOST" = "localhost" ]
|
|
then
|
|
fatal_error "Host undefined. Unable to perform config"
|
|
fi
|
|
|
|
date "+%FT%T.%3N" > $LOGFILE
|
|
IPADDR=$(get_ip $HOST)
|
|
if [ -z "$IPADDR" ]
|
|
then
|
|
fatal_error "Unable to get IP from host: $HOST"
|
|
fi
|
|
|
|
# wait for controller services to be ready if it is an AIO system
|
|
# since ping the loopback interface always returns ok
|
|
if [ -e "${PLATFORM_SIMPLEX_FLAG}" ]
|
|
then
|
|
echo "Wait for the controller services"
|
|
wait_for_controller_services
|
|
if [ $? -ne 0 ]
|
|
then
|
|
fatal_error "Controller services are not ready"
|
|
fi
|
|
else
|
|
/usr/local/bin/connectivity_test -t ${DELAY_SEC} -i ${IPADDR} controller-platform-nfs
|
|
if [ $? -ne 0 ]
|
|
then
|
|
# 'controller-platform-nfs' is not available from management address
|
|
fatal_error "Unable to contact active controller (controller-platform-nfs) from management address"
|
|
fi
|
|
fi
|
|
# Write the hostname to file so it's persistent
|
|
echo $HOST > /etc/hostname
|
|
|
|
if ! [ -e "${PLATFORM_SIMPLEX_FLAG}" ]
|
|
then
|
|
# Mount the platform filesystem (if necessary - could be auto-mounted by now)
|
|
mkdir -p $PLATFORM_DIR
|
|
if [ ! -f $CONFIG_DIR/hosts ]
|
|
then
|
|
nfs-mount controller-platform-nfs:$PLATFORM_DIR $PLATFORM_DIR > /dev/null 2>&1
|
|
RC=$?
|
|
if [ $RC -ne 0 ]
|
|
then
|
|
fatal_error "Unable to mount $PLATFORM_DIR (RC:$RC)"
|
|
fi
|
|
fi
|
|
|
|
# Copy over external_ceph config files
|
|
if [ -e $CONFIG_DIR/ceph-config ]
|
|
then
|
|
cp $CONFIG_DIR/ceph-config/*.conf /etc/ceph/
|
|
if [ $? -ne 0 ]
|
|
then
|
|
fatal_error "Unable to copy ceph-external config files"
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
if [ "$nodetype" = "worker" ]
|
|
then
|
|
# Check whether our installed load matches the active controller
|
|
CONTROLLER_UUID=`curl -sf http://controller:${http_port}/feed/rel-${SW_VERSION}/install_uuid`
|
|
if [ $? -ne 0 ]
|
|
then
|
|
fatal_error "Unable to retrieve installation uuid from active controller"
|
|
fi
|
|
|
|
if [ "$INSTALL_UUID" != "$CONTROLLER_UUID" ]
|
|
then
|
|
fatal_error "This node is running a different load than the active controller and must be reinstalled"
|
|
fi
|
|
|
|
mkdir -p /etc/docker/certs.d/registry.local:9001/
|
|
chmod 700 /etc/docker/certs.d/registry.local:9001/
|
|
cp $CONFIG_DIR/registry-cert.crt /etc/docker/certs.d/registry.local:9001/registry-cert.crt
|
|
if [ $? -ne 0 ]
|
|
then
|
|
fatal_error "Unable to copy $CONFIG_DIR/registry-cert.crt to docker dir"
|
|
fi
|
|
fi
|
|
|
|
if [ -e $CONFIG_DIR/registry.central/registry-cert.crt ]
|
|
then
|
|
mkdir -p /etc/docker/certs.d/registry.central:9001/
|
|
chmod 700 /etc/docker/certs.d/registry.central:9001/
|
|
cp $CONFIG_DIR/registry.central/registry-cert.crt /etc/docker/certs.d/registry.central:9001/registry-cert.crt
|
|
if [ $? -ne 0 ]
|
|
then
|
|
fatal_error "Unable to copy $CONFIG_DIR/registry-cert.crt to docker dir for central registry"
|
|
fi
|
|
fi
|
|
|
|
# Copy over k8s-coredump-handler token
|
|
if [ -e $CONFIG_DIR/k8s-coredump-conf.json ]
|
|
then
|
|
cp $CONFIG_DIR/k8s-coredump-conf.json /etc/k8s-coredump-conf.json
|
|
if [ $? -ne 0 ]
|
|
then
|
|
fatal_error "Unable to copy k8s-coredump-handler token config file"
|
|
else
|
|
chmod 600 /etc/k8s-coredump-conf.json
|
|
fi
|
|
fi
|
|
|
|
# banner customization always returns 0, success:
|
|
/usr/sbin/install_banner_customization
|
|
|
|
cp $CONFIG_DIR/hosts /etc/hosts
|
|
if [ $? -ne 0 ]
|
|
then
|
|
fatal_error "Unable to copy $CONFIG_DIR/hosts"
|
|
fi
|
|
|
|
if [ "$nodetype" = "controller" -a "$HOST" = "controller-1" ]
|
|
then
|
|
# In a small system restore, there may be instance data that we want to
|
|
# restore. Copy it and delete it.
|
|
MATE_INSTANCES_DIR="$CONFIG_DIR/controller-1_nova_instances"
|
|
if [ -d "$MATE_INSTANCES_DIR" ]
|
|
then
|
|
echo "Restoring instance data from mate controller"
|
|
cp -Rp $MATE_INSTANCES_DIR/* /etc/nova/instances/
|
|
rm -rf $MATE_INSTANCES_DIR
|
|
fi
|
|
fi
|
|
|
|
# Apply the puppet manifest
|
|
HOST_HIERA=${PUPPET_PATH}/hieradata/${HOST}.yaml
|
|
if [ -f ${HOST_HIERA} ]; then
|
|
echo "$0: Running puppet manifest apply"
|
|
puppet-manifest-apply.sh ${PUPPET_PATH}/hieradata ${HOST} worker
|
|
RC=$?
|
|
if [ $RC -ne 0 ];
|
|
then
|
|
fatal_error "Failed to run the puppet manifest (RC:$RC)"
|
|
fi
|
|
else
|
|
fatal_error "Host configuration not yet available for this node ($(hostname)=${HOST}); aborting configuration."
|
|
fi
|
|
|
|
# Load Network Block Device
|
|
modprobe nbd
|
|
if [ $? -ne 0 ]
|
|
then
|
|
echo "WARNING: Unable to load kernel module: nbd."
|
|
logger "WARNING: Unable to load kernel module: nbd."
|
|
fi
|
|
|
|
#Run mount command to mount any NFS filesystems that required network access
|
|
/bin/mount -a -t nfs
|
|
RC=$?
|
|
if [ $RC -ne 0 ]
|
|
then
|
|
fatal_error "Unable to mount NFS filesystems (RC:$RC)"
|
|
fi
|
|
|
|
touch $VOLATILE_CONFIG_PASS
|
|
}
|
|
|
|
stop ()
|
|
{
|
|
# Nothing to do
|
|
return
|
|
}
|
|
|
|
case "$1" in
|
|
start)
|
|
start
|
|
;;
|
|
stop)
|
|
stop
|
|
;;
|
|
*)
|
|
echo "Usage: $0 {start|stop}"
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
exit 0
|
|
|