0b4f304be9
In order to avoid conflicts with containerized services binding to standard HTTP (80) / HTTPS (443) port numbers, the default port numbers are changed to 8080 and 8443. Furthermore, CLI commands are provided to allow binding to alternate port numbers. List of changes: . Add service parameters for HTTP and HTTPS port . Configure the lighttpd ports via puppet and use port 8008 for platform horizon . Add http port to platform.conf for the config scripts . Support helm repo URL update . Add helm-toolkit plugin for location override . Override Armada manifest location . Add installer base URL option to pxeboot-update script . Add a patching run time class to restart patch-agent when the port config is changed . Add a semantic check to block port config when a patching operation is in progress or a host is not in unlocked/enabled state CLI commands for viewing and updating port numbers are: system service-parameter-list --service http system service-parameter-modify lighttpd port http=8090 system service-parameter-apply lighttpd Tests Performed: Non-containerized deployment installation and sanity AIO-DX: Sanity and Nightly automated test suite 2+2 System: Sanity and Nightly automated test suite 2+4+6 System: Sanity and Nightly automated test suite Kubernetes deployment on VBox: AIO-SX: application apply and launch instance AIO-DX: application apply and launch instance 2+2 System: application apply and launch instance HTTP/HTTPS port configuration Enable/Disable https Story: 2004642 Task: 28592 Change-Id: I65029e0c15aaf626acb56ab71e7bbde64c7e76a8 Signed-off-by: Tao Liu <tao.liu@windriver.com>
393 lines
13 KiB
Bash
393 lines
13 KiB
Bash
#!/bin/bash
|
|
#
|
|
# Copyright (c) 2013-2019 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
|
|
#
|
|
# chkconfig: 2345 80 80
|
|
#
|
|
|
|
### BEGIN INIT INFO
|
|
# Provides: worker_config
|
|
# Short-Description: Worker node config agent
|
|
# Default-Start: 2 3 4 5
|
|
# Default-Stop: 0 1 6
|
|
### END INIT INFO
|
|
|
|
. /usr/bin/tsconfig
|
|
. /etc/platform/platform.conf
|
|
|
|
PLATFORM_DIR=/opt/platform
|
|
CONFIG_DIR=$CONFIG_PATH
|
|
VOLATILE_CONFIG_PASS="/var/run/.config_pass"
|
|
VOLATILE_CONFIG_FAIL="/var/run/.config_fail"
|
|
LOGFILE="/var/log/worker_config.log"
|
|
IMA_POLICY=/etc/ima.policy
|
|
|
|
# Copy of /opt/platform required for worker_services
|
|
VOLATILE_PLATFORM_PATH=$VOLATILE_PATH/cpe_upgrade_opt_platform
|
|
|
|
DELAY_SEC=600
|
|
# If we're on a controller, increase DELAY_SEC to a large value
|
|
# to allow for active services to recover from a reboot or DOR
|
|
if [ "$nodetype" = "controller" ]
|
|
then
|
|
DELAY_SEC=900
|
|
fi
|
|
|
|
fatal_error()
|
|
{
|
|
cat <<EOF
|
|
*****************************************************
|
|
*****************************************************
|
|
$1
|
|
*****************************************************
|
|
*****************************************************
|
|
EOF
|
|
touch $VOLATILE_CONFIG_FAIL
|
|
logger "Error: $1"
|
|
echo "Pausing for 5 seconds..."
|
|
sleep 5
|
|
exit 1
|
|
}
|
|
|
|
get_ip()
|
|
{
|
|
local host=$1
|
|
|
|
# Check /etc/hosts for the hostname
|
|
local ipaddr=$(cat /etc/hosts | awk -v host=$host '$2 == host {print $1}')
|
|
if [ -n "$ipaddr" ]
|
|
then
|
|
echo $ipaddr
|
|
return
|
|
fi
|
|
|
|
START=$SECONDS
|
|
let -i UNTIL=${SECONDS}+${DELAY_SEC}
|
|
while [ ${UNTIL} -ge ${SECONDS} ]
|
|
do
|
|
|
|
# Because dnsmasq can resolve both a hostname to both an IPv4 and an IPv6
|
|
# address in certain situations, and the last address is the IPv6, which
|
|
# would be the management, this is preferred over the IPv4 pxeboot address,
|
|
# so take the last address only.
|
|
ipaddr=$(dig +short ANY $host|tail -1)
|
|
if [[ "$ipaddr" =~ ^[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*$ ]]
|
|
then
|
|
let -i DURATION=$SECONDS-$START
|
|
logger -t $0 -p info "DNS query resolved to $ipaddr (took ${DURATION} secs)"
|
|
echo $ipaddr
|
|
return
|
|
fi
|
|
if [[ "$ipaddr" =~ ^[0-9a-z]*\:[0-9a-z\:]*$ ]]
|
|
then
|
|
let -i DURATION=$SECONDS-$START
|
|
logger -t $0 -p info "DNS query resolved to $ipaddr (took ${DURATION} secs)"
|
|
echo $ipaddr
|
|
return
|
|
fi
|
|
logger -t $0 -p warn "DNS query failed for $host"
|
|
sleep 5
|
|
done
|
|
let -i DURATION=$SECONDS-$START
|
|
logger -t $0 -p warn "DNS query failed after max retries for $host (${DURATION} secs)"
|
|
}
|
|
|
|
wait_for_controller_services()
|
|
{
|
|
while [ "$SECONDS" -le "$DELAY_SEC" ]
|
|
do
|
|
# Check to make sure the cloud-services group is enabled
|
|
OUTPUT=`sm-query service-group cloud-services`
|
|
if [ "$OUTPUT" == "cloud-services active" ]
|
|
then
|
|
return 0
|
|
fi
|
|
# Not running Let's wait a couple of seconds and check again
|
|
sleep 2
|
|
done
|
|
return 1
|
|
}
|
|
|
|
start()
|
|
{
|
|
if [ -f /etc/platform/installation_failed ] ; then
|
|
fatal_error "/etc/platform/installation_failed flag is set. Aborting."
|
|
fi
|
|
|
|
function=`echo "$subfunction" | cut -f 2 -d','`
|
|
|
|
if [ "$nodetype" != "worker" -a "$function" != "worker" ] ; then
|
|
logger -t $0 -p warn "exiting because this is not worker node"
|
|
exit 0
|
|
fi
|
|
|
|
# If we're on a controller, ensure we only run if the controller config is complete
|
|
if [ "$nodetype" = "controller" -a ! -f /etc/platform/.initial_controller_config_complete ]
|
|
then
|
|
logger -t $0 -p warn "exiting because this is controller that has not completed initial config"
|
|
exit 0
|
|
fi
|
|
|
|
# Exit in error if called while the fail flag file is present
|
|
if [ -e $VOLATILE_CONFIG_FAIL ] ; then
|
|
logger -t $0 -p warn "exiting due to presence of $VOLATILE_CONFIG_FAIL file"
|
|
exit 1
|
|
fi
|
|
|
|
# remove previous pass flag file so that if this fails we don't
|
|
# end up with both pass and fail flag files present
|
|
rm -f $VOLATILE_CONFIG_PASS
|
|
|
|
|
|
if [ "$(stat -c %d:%i /)" != "$(stat -c %d:%i /proc/1/root/.)" ]; then
|
|
# we are in chroot installer environment
|
|
exit 0
|
|
fi
|
|
echo "Configuring worker node..."
|
|
|
|
###### SECURITY PROFILE (EXTENDED) #################
|
|
# If we are in Extended Security Profile mode, #
|
|
# then before anything else, we need to load the #
|
|
# IMA Policy so that all configuration operations #
|
|
# can be measured and appraised #
|
|
# #
|
|
# N.B: Only run for worker nodetype since for AIO #
|
|
# controllerconfig would have already enabled IMA #
|
|
# policy #
|
|
#####################################################
|
|
if [ "$nodetype" = "worker" -a "${security_profile}" = "extended" ]
|
|
then
|
|
IMA_LOAD_PATH=/sys/kernel/security/ima/policy
|
|
if [ -f ${IMA_LOAD_PATH} ]; then
|
|
echo "Loading IMA Policy"
|
|
# Best effort operation only, if policy is
|
|
# malformed then audit logs will indicate this,
|
|
# and customer will need to load policy manually
|
|
cat $IMA_POLICY > ${IMA_LOAD_PATH}
|
|
[ $? -eq 0 ] || logger -t $0 -p warn "IMA Policy could not be loaded, see audit.log"
|
|
else
|
|
# the securityfs mount should have been
|
|
# created had the IMA module loaded properly.
|
|
# This is therefore a fatal error
|
|
fatal_error "${IMA_LOAD_PATH} not available. Aborting."
|
|
fi
|
|
fi
|
|
|
|
HOST=$(hostname)
|
|
if [ -z "$HOST" -o "$HOST" = "localhost" ]
|
|
then
|
|
fatal_error "Host undefined. Unable to perform config"
|
|
fi
|
|
|
|
date "+%FT%T.%3N" > $LOGFILE
|
|
IPADDR=$(get_ip $HOST)
|
|
if [ -z "$IPADDR" ]
|
|
then
|
|
fatal_error "Unable to get IP from host: $HOST"
|
|
fi
|
|
|
|
# wait for controller services to be ready if it is an AIO system
|
|
# since ping the loopback interface always returns ok
|
|
if [ -e "${PLATFORM_SIMPLEX_FLAG}" ]
|
|
then
|
|
echo "Wait for the controller services"
|
|
wait_for_controller_services
|
|
if [ $? -ne 0 ]
|
|
then
|
|
fatal_error "Controller services are not ready"
|
|
fi
|
|
else
|
|
/usr/local/bin/connectivity_test -t ${DELAY_SEC} -i ${IPADDR} controller-platform-nfs
|
|
if [ $? -ne 0 ]
|
|
then
|
|
# 'controller-platform-nfs' is not available from management address
|
|
fatal_error "Unable to contact active controller (controller-platform-nfs) from management address"
|
|
fi
|
|
fi
|
|
# Write the hostname to file so it's persistent
|
|
echo $HOST > /etc/hostname
|
|
|
|
if ! [ -e "${PLATFORM_SIMPLEX_FLAG}" ]
|
|
then
|
|
# Mount the platform filesystem (if necessary - could be auto-mounted by now)
|
|
mkdir -p $PLATFORM_DIR
|
|
if [ ! -f $CONFIG_DIR/hosts ]
|
|
then
|
|
nfs-mount controller-platform-nfs:$PLATFORM_DIR $PLATFORM_DIR > /dev/null 2>&1
|
|
RC=$?
|
|
if [ $RC -ne 0 ]
|
|
then
|
|
fatal_error "Unable to mount $PLATFORM_DIR (RC:$RC)"
|
|
fi
|
|
fi
|
|
|
|
# Copy over external_ceph config files
|
|
if [ -e $CONFIG_DIR/ceph-config ]
|
|
then
|
|
cp $CONFIG_DIR/ceph-config/*.conf /etc/ceph/
|
|
if [ $? -ne 0 ]
|
|
then
|
|
fatal_error "Unable to copy ceph-external config files"
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
if [ "$nodetype" = "worker" ]
|
|
then
|
|
# Check whether our installed load matches the active controller
|
|
CONTROLLER_UUID=`curl -sf http://controller:${http_port}/feed/rel-${SW_VERSION}/install_uuid`
|
|
if [ $? -ne 0 ]
|
|
then
|
|
fatal_error "Unable to retrieve installation uuid from active controller"
|
|
fi
|
|
|
|
if [ "$INSTALL_UUID" != "$CONTROLLER_UUID" ]
|
|
then
|
|
fatal_error "This node is running a different load than the active controller and must be reinstalled"
|
|
fi
|
|
fi
|
|
|
|
# banner customization always returns 0, success:
|
|
/usr/sbin/install_banner_customization
|
|
|
|
cp $CONFIG_DIR/hosts /etc/hosts
|
|
if [ $? -ne 0 ]
|
|
then
|
|
fatal_error "Unable to copy $CONFIG_DIR/hosts"
|
|
fi
|
|
|
|
if [ "$nodetype" = "controller" -a "$HOST" = "controller-1" ]
|
|
then
|
|
# In a small system restore, there may be instance data that we want to
|
|
# restore. Copy it and delete it.
|
|
MATE_INSTANCES_DIR="$CONFIG_DIR/controller-1_nova_instances"
|
|
if [ -d "$MATE_INSTANCES_DIR" ]
|
|
then
|
|
echo "Restoring instance data from mate controller"
|
|
cp -Rp $MATE_INSTANCES_DIR/* /etc/nova/instances/
|
|
rm -rf $MATE_INSTANCES_DIR
|
|
fi
|
|
fi
|
|
|
|
# Upgrade related checks for controller-1 in combined controller/worker
|
|
if [ "$nodetype" = "controller" -a "$HOST" = "controller-1" ]
|
|
then
|
|
# Check controller activity.
|
|
# Prior to the final compile of R5 the service check below had been
|
|
# against platform-nfs-ip. However, there was a worker
|
|
# subfunction configuration failure when an AIO-DX system controller
|
|
# booted up while there was no pingable backup controller. Seems the
|
|
# platform-nfs-ip service was not always reaching the enabled-active
|
|
# state when this check was performed under this particular failure.
|
|
# Seems an earlier launched service of like functionality, namely
|
|
# 'platform-export-fs' is reliably enabled at this point there-by
|
|
# resolving the issue.
|
|
sm-query service platform-export-fs | grep enabled-active > /dev/null 2>&1
|
|
if [ $? -ne 0 ]
|
|
then
|
|
# This controller is not active so it is safe to check the version
|
|
# of the mate controller.
|
|
VOLATILE_ETC_PLATFORM_MOUNT=$VOLATILE_PATH/etc_platform
|
|
mkdir $VOLATILE_ETC_PLATFORM_MOUNT
|
|
nfs-mount controller-0:/etc/platform $VOLATILE_ETC_PLATFORM_MOUNT
|
|
if [ $? -eq 0 ]
|
|
then
|
|
# Check whether software versions match on the two controllers
|
|
MATE_SW_VERSION=$(source $VOLATILE_ETC_PLATFORM_MOUNT/platform.conf && echo $sw_version)
|
|
if [ $SW_VERSION != $MATE_SW_VERSION ]
|
|
then
|
|
echo "Controllers are running different software versions"
|
|
echo "SW_VERSION: $SW_VERSION MATE_SW_VERSION: $MATE_SW_VERSION"
|
|
|
|
# Since controller-1 is always upgraded first (and downgraded
|
|
# last), we know that controller-1 is running a higher release
|
|
# than controller-0.
|
|
# This controller is not active and is running a higher
|
|
# release than the mate controller, so do not launch
|
|
# any of the worker services (they will not work with
|
|
# a lower version of the controller services).
|
|
echo "Disabling worker services until controller activated"
|
|
touch $VOLATILE_DISABLE_WORKER_SERVICES
|
|
|
|
# Copy $PLATFORM_DIR into a temporary location for the worker_services script to
|
|
# access. This is only required for CPE upgrades
|
|
rm -rf $VOLATILE_PLATFORM_PATH
|
|
mkdir -p $VOLATILE_PLATFORM_PATH
|
|
cp -Rp $PLATFORM_DIR/* $VOLATILE_PLATFORM_PATH/
|
|
|
|
fi
|
|
umount $VOLATILE_ETC_PLATFORM_MOUNT
|
|
rmdir $VOLATILE_ETC_PLATFORM_MOUNT
|
|
else
|
|
rmdir $VOLATILE_ETC_PLATFORM_MOUNT
|
|
fatal_error "Unable to mount /etc/platform"
|
|
fi
|
|
else
|
|
# Controller-1 (CPE) is active and is rebooting. This is probably a DOR. Since this
|
|
# could happen during an upgrade, we will copy $PLATFORM_DIR into a temporary
|
|
# location for the worker_services script to access in case of a future swact.
|
|
rm -rf $VOLATILE_PLATFORM_PATH
|
|
mkdir -p $VOLATILE_PLATFORM_PATH
|
|
cp -Rp $PLATFORM_DIR/* $VOLATILE_PLATFORM_PATH/
|
|
fi
|
|
fi
|
|
|
|
# Apply the puppet manifest
|
|
HOST_HIERA=${PUPPET_PATH}/hieradata/${IPADDR}.yaml
|
|
if [ -f ${HOST_HIERA} ]; then
|
|
echo "$0: Running puppet manifest apply"
|
|
puppet-manifest-apply.sh ${PUPPET_PATH}/hieradata ${IPADDR} worker
|
|
RC=$?
|
|
if [ $RC -ne 0 ];
|
|
then
|
|
fatal_error "Failed to run the puppet manifest (RC:$RC)"
|
|
fi
|
|
else
|
|
fatal_error "Host configuration not yet available for this node ($(hostname)=${IPADDR}); aborting configuration."
|
|
fi
|
|
|
|
# Load Network Block Device
|
|
modprobe nbd
|
|
if [ $? -ne 0 ]
|
|
then
|
|
echo "WARNING: Unable to load kernel module: nbd."
|
|
logger "WARNING: Unable to load kernel module: nbd."
|
|
fi
|
|
|
|
#Run mount command to mount any NFS filesystems that required network access
|
|
/bin/mount -a -t nfs
|
|
RC=$?
|
|
if [ $RC -ne 0 ]
|
|
then
|
|
fatal_error "Unable to mount NFS filesystems (RC:$RC)"
|
|
fi
|
|
|
|
touch $VOLATILE_CONFIG_PASS
|
|
}
|
|
|
|
stop ()
|
|
{
|
|
# Nothing to do
|
|
return
|
|
}
|
|
|
|
case "$1" in
|
|
start)
|
|
start
|
|
;;
|
|
stop)
|
|
stop
|
|
;;
|
|
*)
|
|
echo "Usage: $0 {start|stop}"
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
exit 0
|
|
|