From 8327011f91fc8a0a2cbfe8a697d5ff958eb7c011 Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Wed, 12 Sep 2018 17:35:22 -0400 Subject: [PATCH] Add post-test hook for testing evacuate This adds a post-test bash script to test evacuate in a multinode job. This performs two tests: 1. A negative test where we inject a fault by stopping libvirt prior to the evacuation and wait for the server to go to ERROR status. 2. A positive where we restart libvirt, wait for the compute service to be enabled and then evacuate the server and wait for it to be ACTIVE. For now we hack this into the nova-live-migration job, but it should probably live in a different job long-term. Change-Id: I9b7c9ad6b0ab167ba4583681efbbce4b18941178 --- .zuul.yaml | 4 + gate/test_evacuate.sh | 115 ++++++++++++++++++ nova/tests/live_migration/hooks/run_tests.sh | 2 + playbooks/legacy/nova-live-migration/run.yaml | 1 + 4 files changed, 122 insertions(+) create mode 100755 gate/test_evacuate.sh diff --git a/.zuul.yaml b/.zuul.yaml index 2c17b75fbb94..1ab46dcc528d 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -80,6 +80,10 @@ - job: name: nova-live-migration parent: nova-dsvm-multinode-base + description: | + Run tempest live migration tests against both local storage and shared + storage using ceph (the environment is reconfigured for ceph after the + local storage tests are run). Also runs simple evacuate tests. run: playbooks/legacy/nova-live-migration/run.yaml post-run: playbooks/legacy/nova-live-migration/post.yaml diff --git a/gate/test_evacuate.sh b/gate/test_evacuate.sh new file mode 100755 index 000000000000..4b81c5f2368e --- /dev/null +++ b/gate/test_evacuate.sh @@ -0,0 +1,115 @@ +#!/bin/bash -x + +BASE=${BASE:-/opt/stack} +# Source stackrc to determine the configured VIRT_DRIVER +source ${BASE}/new/devstack/stackrc + +set -e +# We need to get the admin credentials to run CLIs. +set +x +source ${BASE}/new/devstack/openrc admin +set -x + +if [[ ${VIRT_DRIVER} != libvirt ]]; then + echo "Only the libvirt driver is supported by this script" + exit 1 +fi + +echo "Ensure we have at least two compute nodes" +nodenames=$(openstack hypervisor list -f value -c 'Hypervisor Hostname') +node_count=$(echo ${nodenames} | wc -w) +if [[ ${node_count} -lt 2 ]]; then + echo "Evacuate requires at least two nodes" + exit 2 +fi + +echo "Finding the subnode" +subnode='' +local_hostname=$(hostname -s) +for nodename in ${nodenames}; do + if [[ ${local_hostname} != ${nodename} ]]; then + subnode=${nodename} + break + fi +done + +# Sanity check that we found the subnode. +if [[ -z ${subnode} ]]; then + echo "Failed to find subnode from nodes: ${nodenames}" + exit 3 +fi + +echo "Creating test server on subnode" +image=$(openstack image list -f value -c Name | awk 'NR==1{print $1}') +flavor=$(openstack flavor list -f value -c Name | awk 'NR==1{print $1}') +openstack server create --image ${image} --flavor ${flavor} \ +--availability-zone nova:${subnode} --wait evacuate-test + +echo "Forcing down the subnode so we can evacuate from it" +openstack --os-compute-api-version 2.11 compute service set --down ${subnode} nova-compute + +echo "Stopping libvirt on the localhost before evacuating to trigger failure" +sudo systemctl stop libvirt-bin + +# Now force the evacuation to *this* host; we have to force to bypass the +# scheduler since we killed libvirtd which will trigger the libvirt compute +# driver to auto-disable the nova-compute service and then the ComputeFilter +# would filter out this host and we'd get NoValidHost. Normally forcing a host +# during evacuate and bypassing the scheduler is a very bad idea, but we're +# doing a negative test here. +# TODO(mriedem): Use OSC when it supports evacuate. +echo "Forcing evacuate to local host" +nova evacuate --force evacuate-test ${local_hostname} +# Wait for the instance to go into ERROR state from the failed evacuate. +count=0 +status=$(openstack server show evacuate-test -f value -c status) +while [ "${status}" != "ERROR" ] +do + sleep 1 + count=$((count+1)) + if [ ${count} -eq 30 ]; then + echo "Timed out waiting for server to go to ERROR status" + exit 4 + fi + status=$(openstack server show evacuate-test -f value -c status) +done + +echo "Now restart libvirt and perform a successful evacuation" +sudo systemctl start libvirt-bin +sleep 10 + +# Wait for the compute service to be enabled. +count=0 +status=$(openstack compute service list --host ${local_hostname} --service nova-compute -f value -c Status) +while [ "${status}" != "enabled" ] +do + sleep 1 + count=$((count+1)) + if [ ${count} -eq 30 ]; then + echo "Timed out waiting for local compute service to be enabled" + exit 5 + fi + status=$(openstack compute service list --host ${local_hostname} --service nova-compute -f value -c Status) +done + +nova evacuate evacuate-test +# Wait for the instance to go into ACTIVE state from the evacuate. +count=0 +status=$(openstack server show evacuate-test -f value -c status) +while [ "${status}" != "ACTIVE" ] +do + sleep 1 + count=$((count+1)) + if [ ${count} -eq 30 ]; then + echo "Timed out waiting for server to go to ACTIVE status" + exit 6 + fi + status=$(openstack server show evacuate-test -f value -c status) +done + +# Make sure the server moved. +host=$(openstack server show evacuate-test -f value -c OS-EXT-SRV-ATTR:host) +if [[ ${host} != ${local_hostname} ]]; then + echo "Unexpected host ${host} for server after evacuate." + exit 7 +fi diff --git a/nova/tests/live_migration/hooks/run_tests.sh b/nova/tests/live_migration/hooks/run_tests.sh index f8683ecd862a..72e0bab83d76 100755 --- a/nova/tests/live_migration/hooks/run_tests.sh +++ b/nova/tests/live_migration/hooks/run_tests.sh @@ -28,6 +28,8 @@ echo '1. test with all local storage (use default for volumes)' echo 'NOTE: test_volume_backed_live_migration is skipped due to https://bugs.launchpad.net/nova/+bug/1524898' run_tempest "block migration test" "^.*test_live_migration(?!.*(test_volume_backed_live_migration))" +# TODO(mriedem): Run $BASE/new/nova/gate/test_evacuate.sh for local storage + #all tests bellow this line use shared storage, need to update tempest.conf echo 'disabling block_migration in tempest' $ANSIBLE primary --sudo -f 5 -i "$WORKSPACE/inventory" -m ini_file -a "dest=$BASE/new/tempest/etc/tempest.conf section=compute-feature-enabled option=block_migration_for_live_migration value=False" diff --git a/playbooks/legacy/nova-live-migration/run.yaml b/playbooks/legacy/nova-live-migration/run.yaml index 16307d8e068e..6e5d3a46d556 100644 --- a/playbooks/legacy/nova-live-migration/run.yaml +++ b/playbooks/legacy/nova-live-migration/run.yaml @@ -34,6 +34,7 @@ export DEVSTACK_GATE_TOPOLOGY="multinode" function post_test_hook { /opt/stack/new/nova/nova/tests/live_migration/hooks/run_tests.sh + $BASE/new/nova/gate/test_evacuate.sh } export -f post_test_hook cp devstack-gate/devstack-vm-gate-wrap.sh ./safe-devstack-vm-gate-wrap.sh