
Due to a race during the operating system boot process, the neutron server sometimes fails to come up. This patch fixes the problem by restarting the neutron server if it does not reply for too long. Change-Id: Idbb1a360f83abff9374139913eb7677af88e42ed
824 lines
23 KiB
Bash
Executable File
824 lines
23 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -o errexit -o nounset
|
|
TOP_DIR=$(cd "$(dirname "$0")/.." && pwd)
|
|
source "$TOP_DIR/config/paths"
|
|
source "$CONFIG_DIR/credentials"
|
|
source "$LIB_DIR/functions.guest.sh"
|
|
source "$CONFIG_DIR/demo-openstackrc.sh"
|
|
|
|
exec_logfile
|
|
|
|
indicate_current_auto
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Launch a demo instance.
|
|
#------------------------------------------------------------------------------
|
|
|
|
# Packets from the instance VM destined for the Internet will have its
|
|
# floating IP address as the sender address. For your instance VM to
|
|
# get Internet access, you will probably have to configure masquerading
|
|
# on your host computer.
|
|
|
|
# On Linux, turning on masquerading may look something like this:
|
|
|
|
# echo "1" > /proc/sys/net/ipv4/ip_forward
|
|
# modprobe ip_tables
|
|
# modprobe ip_conntrack
|
|
# iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE
|
|
# iptables -A FORWARD -i eth0 -o vboxnet2 -m state \
|
|
# --state RELATED,ESTABLISHED -j ACCEPT
|
|
# iptables -A FORWARD -i vboxnet2 -o eth0 -j ACCEPT
|
|
|
|
# Set this true if you have masquerading enabled to allow instance VMs access
|
|
# to the Internet.
|
|
: ${MASQUERADING:=true}
|
|
|
|
# Set this true if you want the instance to use the Google Public DNS name
|
|
# server. The default uses dnsmasq running on a node.
|
|
: ${EXT_DNS:=true}
|
|
|
|
DEMO_INSTANCE_NAME=demo-instance1
|
|
|
|
echo "SUM --- BEGIN"
|
|
|
|
function ssh_no_chk_node {
|
|
ssh_no_chk -i "$HOME/.ssh/osbash_key" "$@"
|
|
}
|
|
|
|
function ssh_no_chk {
|
|
echo "ssh $@"
|
|
# Options set to disable strict host key checking and related messages.
|
|
ssh \
|
|
-o "UserKnownHostsFile /dev/null" \
|
|
-o "StrictHostKeyChecking no" \
|
|
-o LogLevel=error \
|
|
"$@"
|
|
}
|
|
|
|
# Work around neutron client failing with unsupported locale settings
|
|
if [[ "$(neutron --help)" == "unsupported locale setting" ]]; then
|
|
echo "Locale not supported on node, setting LC_ALL=C."
|
|
export LC_ALL=C
|
|
fi
|
|
|
|
function wait_for_service {
|
|
local node=$1
|
|
local service=$2
|
|
local cnt=0
|
|
echo -n "Node $node, service $service:"
|
|
until ssh_no_chk_node "$node" service "$service" status | \
|
|
grep -q "start/running"; do
|
|
cnt=$((cnt + 1))
|
|
if [ $((cnt % 150)) -eq 0 ]; then
|
|
echo " does not seem to come up. Forcing restart."
|
|
|
|
echo
|
|
echo "SUM ERROR $service on node $node not coming up."
|
|
ssh_no_chk_node "$node" \
|
|
sudo service "$service" restart
|
|
SERVICE_RESTARTS="${SERVICE_RESTARTS:-""}$service@$node "
|
|
fi
|
|
sleep 2
|
|
echo -n .
|
|
done
|
|
echo " up"
|
|
}
|
|
|
|
echo "Running on host: $(hostname)"
|
|
|
|
echo "Checking network connection to network node."
|
|
ping -c1 network-mgmt
|
|
echo
|
|
|
|
echo "Checking network connection to compute node."
|
|
ping -c1 compute-mgmt
|
|
echo
|
|
|
|
echo "Checking services on network node."
|
|
wait_for_service network-mgmt openvswitch-switch
|
|
wait_for_service network-mgmt neutron-plugin-openvswitch-agent
|
|
wait_for_service network-mgmt neutron-l3-agent
|
|
wait_for_service network-mgmt neutron-dhcp-agent
|
|
wait_for_service network-mgmt neutron-metadata-agent
|
|
echo
|
|
|
|
echo "Checking services on compute node."
|
|
wait_for_service compute-mgmt nova-compute
|
|
wait_for_service compute-mgmt openvswitch-switch
|
|
wait_for_service compute-mgmt neutron-plugin-openvswitch-agent
|
|
echo
|
|
|
|
function wait_for_nova_compute {
|
|
if sudo nova-manage service list --service nova-compute | \
|
|
grep -q ":-)"; then
|
|
return 0
|
|
fi
|
|
echo " Waiting for nova-compute to switch from XXX to :-)."
|
|
if ssh_no_chk_node compute-mgmt service nova-compute status | \
|
|
grep -q "start/running"; then
|
|
echo -n " Service is up, waiting (may take a few minutes)."
|
|
fi
|
|
local cnt=0
|
|
local start=$(date +%s)
|
|
while sudo nova-manage service list --service nova-compute | grep -q XXX; do
|
|
cnt=$((cnt + 1))
|
|
sleep 5
|
|
if ssh_no_chk_node compute-mgmt service nova-compute status | \
|
|
grep -q "start/running"; then
|
|
if [ $cnt -eq 300 ]; then
|
|
# This should never happen.
|
|
echo "SUM ABORT nova-compute remains XXX while up."
|
|
echo "Aborting."
|
|
exit 1
|
|
fi
|
|
echo -n .
|
|
else
|
|
echo
|
|
echo "SUM ERROR nova-compute on compute node has died."
|
|
echo "Restarting nova-compute on compute node."
|
|
ssh_no_chk_node compute-mgmt \
|
|
sudo service nova-compute restart
|
|
NOVA_COMPUTE_RESTART=$((${NOVA_COMPUTE_RESTART:-0} + 1))
|
|
fi
|
|
done
|
|
echo
|
|
}
|
|
|
|
function wait_for_nova_services {
|
|
local start=$(date +%s)
|
|
|
|
echo "Checking services in sudo nova-manage service list."
|
|
echo -n " Waiting for controller services to switch from XXX to :-)."
|
|
# Ignore nova-compute for now, even if a custom config has it on controller
|
|
while sudo nova-manage service list --host controller | \
|
|
grep -v nova-compute | grep -q XXX; do
|
|
sleep 2
|
|
echo -n .
|
|
done
|
|
echo
|
|
|
|
if ! sudo nova-manage service list | grep -q nova-compute; then
|
|
echo -n " Waiting for nova-compute to turn up in list."
|
|
until sudo nova-manage service list | grep -q nova-compute; do
|
|
sleep 2
|
|
echo -n .
|
|
done
|
|
echo
|
|
fi
|
|
|
|
wait_for_nova_compute
|
|
echo
|
|
echo "SUM wait for nova services: $(($(date +%s) - start))"
|
|
}
|
|
|
|
if [ ${NOVA_COMPUTE_RESTART:-0} -ne 0 ]; then
|
|
echo "SUM ERROR nova-compute restarts: $NOVA_COMPUTE_RESTART"
|
|
fi
|
|
|
|
wait_for_nova_services
|
|
|
|
echo "All services are ready:"
|
|
sudo nova-manage service list
|
|
echo
|
|
|
|
function show_compute_resource_usage {
|
|
echo "nova list:"
|
|
nova list
|
|
(
|
|
source "$CONFIG_DIR/admin-openstackrc.sh"
|
|
echo "As admin user, nova host-list:"
|
|
nova host-list
|
|
echo "As admin user, nova host-describe compute:"
|
|
nova host-describe compute
|
|
)
|
|
}
|
|
|
|
function wait_for_neutron_agents {
|
|
local agent_list=$LOG_DIR/test-agent.list
|
|
local start=$(date +%s)
|
|
echo -n "Waiting for agents in neutron agent-list."
|
|
(
|
|
source "$CONFIG_DIR/admin-openstackrc.sh"
|
|
neutron agent-list | sort > "$agent_list"
|
|
local out=$(grep " :-) " "$agent_list" || rc=$?)
|
|
if [ -n "$out" ]; then
|
|
echo
|
|
echo "$out"
|
|
fi
|
|
while [ : ]; do
|
|
neutron agent-list | sort > "$agent_list.new"
|
|
out=$(comm -13 "$agent_list" "$agent_list.new")
|
|
if [ -n "$out" ]; then
|
|
echo
|
|
echo "$out"
|
|
fi
|
|
if ! grep -q " xxx " "$agent_list"; then
|
|
break
|
|
fi
|
|
mv "$agent_list.new" "$agent_list"
|
|
sleep 1
|
|
echo -n .
|
|
done
|
|
echo
|
|
echo "All agents are ready."
|
|
neutron agent-list
|
|
echo
|
|
)
|
|
echo "SUM wait for neutron agents: $(($(date +%s) - start))"
|
|
}
|
|
|
|
wait_for_neutron_agents
|
|
|
|
function check_namespaces {
|
|
local cnt
|
|
|
|
echo -n "Getting router namespace."
|
|
cnt=0
|
|
until ssh_no_chk_node network-mgmt ip netns | grep qrouter; do
|
|
cnt=$((cnt + 1))
|
|
sleep 1
|
|
echo -n "."
|
|
done
|
|
echo "SUM wait for router namespace: $cnt"
|
|
local nsrouter=$(ssh_no_chk_node network-mgmt ip netns | grep qrouter)
|
|
|
|
echo -n "Getting DHCP namespace."
|
|
cnt=0
|
|
until ssh_no_chk_node network-mgmt ip netns | grep qdhcp; do
|
|
cnt=$((cnt + 1))
|
|
if [ $cnt -eq 10 ]; then
|
|
echo
|
|
echo "SUM ERROR No DCHP namespace, restarting neutron-dhcp-agent."
|
|
echo "Restarting neutron-dhcp-agent on network node."
|
|
ssh_no_chk_node network-mgmt \
|
|
sudo service neutron-dhcp-agent restart
|
|
fi
|
|
sleep 1
|
|
echo -n "."
|
|
done
|
|
echo "SUM wait for DHCP namespace: $cnt"
|
|
local nsdhcp=$(ssh_no_chk_node network-mgmt ip netns | grep qdhcp)
|
|
|
|
echo -n "Waiting for interface qr-* in router namespace."
|
|
cnt=0
|
|
until ssh_no_chk_node network-mgmt \
|
|
sudo ip netns exec "$nsrouter" ip addr | \
|
|
grep -Po "(?<=: )qr-.*(?=:)"; do
|
|
cnt=$((cnt + 1))
|
|
sleep 1
|
|
echo -n "."
|
|
done
|
|
echo "SUM wait for interface qr-*: $cnt"
|
|
|
|
echo -n "Waiting for interface qg-* in router namespace."
|
|
cnt=0
|
|
until ssh_no_chk_node network-mgmt \
|
|
sudo ip netns exec "$nsrouter" ip addr | \
|
|
grep -Po "(?<=: )qg-.*(?=:)"; do
|
|
cnt=$((cnt + 1))
|
|
sleep 1
|
|
echo -n "."
|
|
done
|
|
echo "SUM wait for interface qg-*: $cnt"
|
|
|
|
echo -n "Waiting for interface tap* in DHCP namespace."
|
|
cnt=0
|
|
until ssh_no_chk_node network-mgmt \
|
|
sudo ip netns exec "$nsdhcp" ip addr | \
|
|
grep -Po "(?<=: )tap.*(?=:)"; do
|
|
cnt=$((cnt + 1))
|
|
sleep 1
|
|
echo -n "."
|
|
done
|
|
echo "SUM wait for interface tap*: $cnt"
|
|
}
|
|
|
|
check_namespaces
|
|
|
|
if [ ! -f ~/.ssh/id_rsa ]; then
|
|
echo "Generating an ssh key pair (saved to ~/.ssh/id_rsa*)."
|
|
# For training cluster: no password protection on keys to make scripting
|
|
# easier
|
|
ssh-keygen -f ~/.ssh/id_rsa -N ""
|
|
fi
|
|
|
|
function check_demo_key {
|
|
echo -n "Checking if 'demo-key' is already in our OpenStack environment: "
|
|
if nova keypair-show demo-key >/dev/null 2>&1; then
|
|
echo "yes."
|
|
|
|
echo -n "Checking if the 'demo-key' key pair matches our ssh key: "
|
|
|
|
ssh_key=$(< ~/.ssh/id_rsa.pub awk '{print $2}')
|
|
stored_key=$(nova keypair-show demo-key | \
|
|
awk '/^Public key: ssh-rsa/ {print $4}')
|
|
|
|
if [ "$ssh_key" != "$stored_key" ]; then
|
|
echo "no."
|
|
echo "Removing the 'demo-key' from the OpenStack envirnoment."
|
|
nova keypair-delete demo-key
|
|
else
|
|
echo "yes."
|
|
fi
|
|
else
|
|
echo "no."
|
|
fi
|
|
}
|
|
check_demo_key
|
|
|
|
if ! nova keypair-show demo-key 2>/dev/null; then
|
|
echo "Adding the public key to our OpenStack environment."
|
|
nova keypair-add --pub-key ~/.ssh/id_rsa.pub demo-key
|
|
fi
|
|
|
|
echo "Verifying addition of the public key."
|
|
nova keypair-list
|
|
|
|
echo "Listing available flavors."
|
|
nova flavor-list
|
|
|
|
echo "Listing available images."
|
|
nova image-list
|
|
|
|
# Wait for neutron to start
|
|
wait_for_neutron
|
|
|
|
echo "Listing available networks."
|
|
neutron net-list
|
|
|
|
DEMO_NET_ID=$(neutron net-list | awk '/ demo-net / {print $2}')
|
|
echo "ID for demo-net tenant network: $DEMO_NET_ID"
|
|
|
|
echo "Listing available security groups."
|
|
nova secgroup-list
|
|
|
|
echo "Settings for demo-subnet:"
|
|
neutron subnet-show demo-subnet
|
|
echo
|
|
|
|
if [ "$EXT_DNS" = true ]; then
|
|
echo "Setting DNS name server for subnet (passed to booting instance VMs)."
|
|
neutron subnet-update demo-subnet --dns_nameservers list=true 8.8.4.4
|
|
echo
|
|
else
|
|
echo "Clearing DNS name server for subnet (passed to booting instance VMs)."
|
|
neutron subnet-update demo-subnet --dns_nameservers action=clear
|
|
fi
|
|
|
|
echo "Settings for demo-subnet:"
|
|
neutron subnet-show demo-subnet
|
|
echo
|
|
|
|
nova list
|
|
nova list | awk " / $DEMO_INSTANCE_NAME / {print \$2}" | while read instance; do
|
|
echo "Removing instance $DEMO_INSTANCE_NAME ($instance)."
|
|
nova delete "$instance"
|
|
done
|
|
echo -n "Waiting for removed instances to disappear (may take > 1 min)."
|
|
while nova list|grep -q "$DEMO_INSTANCE_NAME"; do
|
|
sleep 1
|
|
echo -n .
|
|
done
|
|
echo
|
|
|
|
echo "There should be no $DEMO_INSTANCE_NAME instances left:"
|
|
nova list
|
|
|
|
NOVA_SCHED_LOG=/var/log/upstart/nova-scheduler.log
|
|
NOVA_API_LOG=/var/log/upstart/nova-api.log
|
|
|
|
|
|
VM_LAUNCHES=0
|
|
|
|
function request_instance {
|
|
# Keep a copy of current state of nova-scheduler.log
|
|
sudo cp -vf $NOVA_SCHED_LOG $NOVA_API_LOG /tmp
|
|
|
|
if [ -n "${instance_info:-""}" ]; then
|
|
rm -f "$instance_info"
|
|
else
|
|
instance_info=$LOG_DIR/test-instance.info
|
|
echo "Instance info: $instance_info"
|
|
fi
|
|
|
|
local img_name=$(basename "$CIRROS_URL" -disk.img)
|
|
|
|
echo "Requesting an instance."
|
|
nova boot \
|
|
--flavor m1.tiny \
|
|
--image "$img_name" \
|
|
--nic net-id="$DEMO_NET_ID" \
|
|
--security-group default \
|
|
--key-name demo-key \
|
|
"$DEMO_INSTANCE_NAME" > "$instance_info"
|
|
VM_LAUNCHES=$(( VM_LAUNCHES + 1 ))
|
|
}
|
|
|
|
BOOT_LOG=$LOG_DIR/test-instance.boot
|
|
echo "Boot log: $BOOT_LOG"
|
|
|
|
function save_boot_log {
|
|
local rc=0
|
|
rm -f "$BOOT_LOG"
|
|
nova console-log "$DEMO_INSTANCE_NAME" >"$BOOT_LOG" 2>&1 || rc=$?
|
|
if [ $rc -ne 0 ]; then
|
|
echo >&2 "nova console-log returned error status $rc"
|
|
fi
|
|
return $rc
|
|
}
|
|
|
|
function explain_instance_failure {
|
|
cat << TXT_INSTANCE_FAILURE
|
|
|
|
After deleting an instance, it can take nova up to a minute to realize that
|
|
the compute node is free. Under tight space constraints, this becomes a
|
|
common source of failure.
|
|
|
|
As an admin, we could list hosts (including compute hosts):
|
|
|
|
$ nova host-list
|
|
|
|
And check resource usage in description of host 'compute':
|
|
|
|
$ nova host-describe compute
|
|
|
|
As a regular user, we would have to keep trying for up to a minute and hope
|
|
it works soon.
|
|
|
|
The fastest way to update the database, however, is to restart nova-compute
|
|
on the compute node.
|
|
|
|
TXT_INSTANCE_FAILURE
|
|
}
|
|
|
|
function status_409_fixed {
|
|
echo "Checking log files for cause of failure."
|
|
|
|
if sudo comm -13 /tmp/nova-scheduler.log $NOVA_SCHED_LOG |
|
|
grep "has not been heard from in a while"; then
|
|
echo
|
|
echo "SUM ERROR Missing connection with nova-compute on compute node."
|
|
echo "(Did controller node boot after compute node?)"
|
|
echo
|
|
elif sudo comm -13 /tmp/nova-scheduler.log $NOVA_SCHED_LOG |
|
|
grep "Filter RamFilter returned 0 hosts"; then
|
|
echo "SUM ERROR Filter RamFilter returned 0 hosts"
|
|
explain_instance_failure
|
|
show_compute_resource_usage
|
|
elif sudo comm -13 /tmp/nova-api.log $NOVA_API_LOG |
|
|
grep "HTTP exception thrown:"; then
|
|
# Just waiting should be enough to fix this
|
|
echo -n "Waiting for HTTP status 409 to cure itself."
|
|
local cnt=0
|
|
until [ $cnt -eq 5 ]; do
|
|
if ! console_status_409; then
|
|
HTTP_EXCEPTIONS="${HTTP_EXCEPTIONS:-""}$cnt "
|
|
echo "okay"
|
|
# We can continue with this instance
|
|
return 0
|
|
fi
|
|
cnt=$((cnt + 1))
|
|
sleep 2
|
|
echo -n .
|
|
done
|
|
HTTP_EXCEPTIONS="${HTTP_EXCEPTIONS:-""}${cnt}-fail "
|
|
echo "failed"
|
|
else
|
|
echo "Unknown reason. See for yourself."
|
|
echo "nova-scheduler.log:"
|
|
sudo comm -13 /tmp/nova-scheduler.log $NOVA_SCHED_LOG
|
|
echo "nova-api.log:"
|
|
sudo comm -13 /tmp/nova-api.log $NOVA_API_LOG
|
|
echo "SUM ABORT Unknown 409 error"
|
|
exit 1
|
|
fi
|
|
# Not fixed, need to try with new VM
|
|
return 1
|
|
}
|
|
|
|
function console_status_409 {
|
|
! save_boot_log 2>/dev/null &&
|
|
grep -q "is not ready (HTTP 409)" "$BOOT_LOG"
|
|
}
|
|
|
|
function console_status_404 {
|
|
! save_boot_log 2>/dev/null &&
|
|
grep -q "Unable to get console (HTTP 404)" "$BOOT_LOG"
|
|
}
|
|
|
|
function instance_status {
|
|
nova list | awk "/$DEMO_INSTANCE_NAME/ {print \$6}"
|
|
}
|
|
|
|
function instance_status_is {
|
|
local status=$1
|
|
nova list | grep "$DEMO_INSTANCE_NAME" | grep -q "$status"
|
|
}
|
|
|
|
while [ : ]; do
|
|
echo "###################################################################"
|
|
echo "Launching an instance VM ($VM_LAUNCHES)."
|
|
request_instance > /dev/null
|
|
|
|
if console_status_409; then
|
|
echo "nova console-log returned:"
|
|
cat "$BOOT_LOG"
|
|
echo
|
|
|
|
if ! status_409_fixed; then
|
|
|
|
echo "Instance build failed."
|
|
echo "Deleting failed instance VM."
|
|
nova delete "$DEMO_INSTANCE_NAME"
|
|
|
|
echo "Checking nova-compute on the compute node."
|
|
wait_for_nova_compute
|
|
|
|
echo -n "Requesting new instance VMs until it works."
|
|
cnt=0
|
|
while [ : ]; do
|
|
request_instance >/dev/null
|
|
if console_status_409; then
|
|
nova delete "$DEMO_INSTANCE_NAME"
|
|
cnt=$((cnt + 1))
|
|
if [ $cnt -eq 5 ]; then
|
|
echo
|
|
echo "SUM ERROR console status remains 409."
|
|
echo "Restarting nova-compute on compute node."
|
|
ssh_no_chk_node compute-mgmt \
|
|
sudo service nova-compute restart
|
|
NOVA_COMPUTE_RESTART=$((${NOVA_COMPUTE_RESTART:-0} + 1))
|
|
fi
|
|
sleep 2
|
|
echo -n .
|
|
else
|
|
# Either no error or a different error
|
|
echo
|
|
break
|
|
fi
|
|
done
|
|
fi
|
|
fi
|
|
|
|
if console_status_404; then
|
|
echo "nova console-log returned:"
|
|
cat "$BOOT_LOG"
|
|
echo
|
|
|
|
echo -n "Waiting for console."
|
|
# Console status 404 may persist after instance status becomes ERROR.
|
|
while console_status_404 && instance_status_is BUILD; do
|
|
sleep 1
|
|
echo -n .
|
|
done
|
|
echo
|
|
if ! console_status_404; then
|
|
echo "Console status is no longer 404."
|
|
fi
|
|
|
|
fi
|
|
|
|
echo -n "Waiting for instance to get out of BUILD status."
|
|
while instance_status_is BUILD; do
|
|
sleep 1
|
|
echo -n .
|
|
done
|
|
echo
|
|
|
|
if instance_status_is ERROR; then
|
|
echo "Instance VM status: ERROR"
|
|
|
|
if sudo comm -13 /tmp/nova-scheduler.log $NOVA_SCHED_LOG |
|
|
grep "Filter RetryFilter returned 0 hosts"; then
|
|
echo "SUM ERROR RetryFilter returned 0 hosts"
|
|
show_compute_resource_usage
|
|
echo "Restarting nova-compute on compute node."
|
|
ssh_no_chk_node compute-mgmt \
|
|
sudo service nova-compute restart
|
|
NOVA_COMPUTE_RESTART=$((${NOVA_COMPUTE_RESTART:-0} + 1))
|
|
fi
|
|
|
|
echo "Deleting failed instance VM."
|
|
nova delete "$DEMO_INSTANCE_NAME"
|
|
elif instance_status_is ACTIVE; then
|
|
echo "Instance VM status: ACTIVE."
|
|
break
|
|
fi
|
|
|
|
if [ $VM_LAUNCHES -eq 10 ]; then
|
|
echo "SUM ABORT $VM_LAUNCHES launch attempts failed. Giving up."
|
|
exit 1
|
|
fi
|
|
done
|
|
|
|
if [ "${HTTP_EXCEPTIONS:-0}" != "0" ]; then
|
|
echo "SUM ERROR HTTP exceptions: ${HTTP_EXCEPTIONS:-0}"
|
|
fi
|
|
|
|
echo -n "Waiting for DHCP discover."
|
|
until grep -q "Sending discover..." "$BOOT_LOG"; do
|
|
sleep 2
|
|
echo -n .
|
|
save_boot_log
|
|
done
|
|
echo
|
|
|
|
echo -n "Waiting for DHCP success."
|
|
until grep -q "^Lease of" "$BOOT_LOG"; do
|
|
DHCP_WAIT=$((${DHCP_WAIT:-0} + 1))
|
|
if grep "No lease, failing" "$BOOT_LOG"; then
|
|
echo "SUM ABORT DHCP wait: fail (${DHCP_WAIT:-0})"
|
|
echo "Aborting."
|
|
exit 1
|
|
fi
|
|
sleep 2
|
|
echo -n .
|
|
save_boot_log
|
|
done
|
|
echo
|
|
echo "SUM DHCP wait: ${DHCP_WAIT:-0}"
|
|
echo
|
|
|
|
echo -n "Waiting for metadata success."
|
|
until grep -q "successful after" "$BOOT_LOG"; do
|
|
if grep "failed to read iid from metadata" "$BOOT_LOG"; then
|
|
echo "SUM ABORT failed to get metadata"
|
|
echo "Aborting."
|
|
exit 1
|
|
fi
|
|
sleep 2
|
|
echo -n .
|
|
save_boot_log
|
|
done
|
|
echo
|
|
|
|
echo -n "Waiting for login prompt."
|
|
until grep -q "$DEMO_INSTANCE_NAME login:" "$BOOT_LOG"; do
|
|
sleep 2
|
|
echo -n .
|
|
save_boot_log
|
|
done
|
|
echo
|
|
|
|
echo "Obtaining a VNC session URL for our instance."
|
|
nova get-vnc-console "$DEMO_INSTANCE_NAME" novnc
|
|
|
|
echo
|
|
echo "Permitting ICMP (ping) to our instances."
|
|
nova secgroup-add-rule default icmp -1 -1 0.0.0.0/0 2>/dev/null || rc=$?
|
|
if [ ${rc:-0} -ne 0 ]; then
|
|
echo "Rule was already there."
|
|
fi
|
|
|
|
echo
|
|
echo "Permitting secure shell (SSH) access to our instances."
|
|
nova secgroup-add-rule default tcp 22 22 0.0.0.0/0 2>/dev/null || rc=$?
|
|
if [ ${rc:-0} -ne 0 ]; then
|
|
echo "Rule was already there."
|
|
fi
|
|
|
|
echo
|
|
echo "Verifying security-group rules."
|
|
nova secgroup-list-rules default
|
|
|
|
echo
|
|
echo "Creating a floating IP address on the ext-net external network."
|
|
floating_ip_id=$(neutron floatingip-create ext-net | awk '/ id / {print $4}')
|
|
neutron floatingip-show "$floating_ip_id"
|
|
|
|
floating_ip=$(neutron floatingip-show "$floating_ip_id" |
|
|
awk '/ floating_ip_address / {print $4}')
|
|
|
|
echo
|
|
echo "Associating the floating IP address with our instance."
|
|
nova floating-ip-associate "$DEMO_INSTANCE_NAME" "$floating_ip"
|
|
|
|
echo
|
|
echo "Checking the status of your floating IP address."
|
|
nova list
|
|
|
|
echo
|
|
echo -n "Verifying network connectivity to instance VM (may take 2+ min)."
|
|
# Since Juno, the floating IP often takes a long time to become pingable.
|
|
# Hopefully, this will be fixed, but for the time being we just ping the
|
|
# floating IP until we get a reply (or we reach a time limit and give up).
|
|
function patient_ping {
|
|
local ip=$1
|
|
local cnt=0
|
|
|
|
while [ : ]; do
|
|
echo -n .
|
|
sleep 1
|
|
|
|
# Ping the instance VM every ten seconds
|
|
if [[ $((cnt % 10)) -eq 0 ]]; then
|
|
if ping -c1 "$ip" > /dev/null ; then
|
|
echo
|
|
ping -c1 "$ip"
|
|
echo "SUM ping instance VM after $cnt seconds."
|
|
break
|
|
fi
|
|
fi
|
|
|
|
# Abort if it takes too long
|
|
if [[ $cnt -gt 600 ]]; then
|
|
echo
|
|
echo "SUM ERROR no ping for instance VM in $cnt seconds. Aborting."
|
|
exit 1
|
|
fi
|
|
|
|
cnt=$((cnt + 1))
|
|
done
|
|
}
|
|
|
|
patient_ping "$floating_ip"
|
|
|
|
echo
|
|
echo "Accessing our instance using SSH from the controller node."
|
|
ssh_no_chk "cirros@$floating_ip" uptime
|
|
|
|
echo
|
|
echo "Interface configuration on instance VM."
|
|
ssh_no_chk "cirros@$floating_ip" ip addr
|
|
|
|
echo
|
|
echo "Routing information on instance VM."
|
|
ssh_no_chk "cirros@$floating_ip" /sbin/route -n
|
|
|
|
echo
|
|
echo "/etc/resolv.conf on instance VM."
|
|
ssh_no_chk "cirros@$floating_ip" cat /etc/resolv.conf
|
|
|
|
echo
|
|
echo "Pinging our own floating IP from inside the instance."
|
|
ssh_no_chk "cirros@$floating_ip" ping -c1 "$floating_ip"
|
|
|
|
echo
|
|
echo "Pinging IP address of controller-api."
|
|
ssh_no_chk "cirros@$floating_ip" ping -c1 "$(hostname_to_ip controller-api)"
|
|
|
|
if [ "$EXT_DNS" = true ]; then
|
|
echo "Skipping tests of dnsmasq /etc/hosts."
|
|
else
|
|
# Works only with dnsmasq using the node's /etc/hosts
|
|
echo
|
|
echo "Pinging controller-api (test local DNS name resolution)."
|
|
ssh_no_chk "cirros@$floating_ip" ping -c1 controller-api
|
|
echo
|
|
echo "Pinging network-api."
|
|
ssh_no_chk "cirros@$floating_ip" ping -c1 network-api
|
|
fi
|
|
|
|
if [ "$MASQUERADING" = true -a "$EXT_DNS" = false ]; then
|
|
echo
|
|
echo "This may work thanks to masquerading."
|
|
ssh_no_chk "cirros@$floating_ip" ping -c1 network-mgmt
|
|
echo
|
|
ssh_no_chk "cirros@$floating_ip" ping -c1 network-data
|
|
fi
|
|
|
|
function test_internet {
|
|
if [ "$MASQUERADING" = true ]; then
|
|
local ext_ping=1
|
|
echo
|
|
echo "Pinging Google Public DNS name server."
|
|
until ssh_no_chk "cirros@$floating_ip" ping -c1 8.8.8.8; do
|
|
if [ $ext_ping -eq 3 ]; then
|
|
echo "Failed. Giving up."
|
|
echo "SUM ERROR ping Internet: failed ($ext_ping)"
|
|
ext_ping="$ext_ping (failed)"
|
|
return 0
|
|
fi
|
|
echo
|
|
echo "Trying again in 1 s."
|
|
sleep 1
|
|
ext_ping=$((ext_ping + 1))
|
|
done
|
|
echo "SUM ping Internet: $ext_ping"
|
|
|
|
echo
|
|
echo "Testing DNS name resolution within instance VM."
|
|
ssh_no_chk "cirros@$floating_ip" ping -c1 openstack.org
|
|
fi
|
|
}
|
|
|
|
test_internet
|
|
|
|
if [ "$EXT_DNS" = true ]; then
|
|
echo
|
|
echo "Removing DNS name servers from subnet."
|
|
neutron subnet-update demo-subnet --dns_nameservers action=clear
|
|
fi
|
|
|
|
echo
|
|
echo "Summary"
|
|
echo "======="
|
|
echo "SUM service restarts: ${SERVICE_RESTARTS:--}"
|
|
echo "SUM instance launches: $VM_LAUNCHES"
|
|
echo "SUM END"
|
|
|
|
echo
|
|
echo "Try this, it should work:"
|
|
echo "Command: 'ssh cirros@$floating_ip' [ password: 'cubswin:)' ]"
|
|
|