From f357d8040741b0346c3105fb3d3d1b260f5cb13d Mon Sep 17 00:00:00 2001 From: Lee Yarwood Date: Wed, 5 Aug 2020 11:40:11 +0100 Subject: [PATCH] zuul: Introduce nova-evacuate This change reworks the evacuation parts of the original nova-live-migration job into a zuulv3 native ansible role and initial job covering local ephemeral and iSCSI/LVM volume attached instance evacuation. Future jobs will cover ceph and other storage backends. Change-Id: I380e9ca1e6a84da2b2ae577fb48781bf5c740e23 --- .zuul.yaml | 19 +++--- playbooks/nova-evacuate/run.yaml | 8 +++ roles/run-evacuate-hook/README.rst | 1 + .../files/setup_evacuate_resources.sh | 34 +++++++++++ .../run-evacuate-hook/files/test_evacuate.sh | 55 ++++++++++++++++++ .../files/test_negative_evacuate.sh | 37 ++++++++++++ roles/run-evacuate-hook/tasks/main.yaml | 58 +++++++++++++++++++ 7 files changed, 205 insertions(+), 7 deletions(-) create mode 100644 playbooks/nova-evacuate/run.yaml create mode 100644 roles/run-evacuate-hook/README.rst create mode 100755 roles/run-evacuate-hook/files/setup_evacuate_resources.sh create mode 100755 roles/run-evacuate-hook/files/test_evacuate.sh create mode 100755 roles/run-evacuate-hook/files/test_negative_evacuate.sh create mode 100644 roles/run-evacuate-hook/tasks/main.yaml diff --git a/.zuul.yaml b/.zuul.yaml index b4f67067e47f..212f67be4040 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -95,13 +95,16 @@ # description: | # Run tempest live migration tests against ceph ephemeral storage and # cinder volumes. -# -#- job: -# name: nova-multinode-evacuate -# description: | -# Verifiy the evacuation of instances with local qcow2 ephemeral disks -# from down compute hosts. -# + +- job: + name: nova-evacuate + parent: tempest-multinode-full-py3 + description: | + Verifiy the evacuation of instances with local qcow2 ephemeral disks + and attached iSCSI/LVM volumes from down compute hosts. + irrelevant-files: *dsvm-irrelevant-files + run: playbooks/nova-evacuate/run.yaml + #- job: # name: nova-multinode-evacuate-ceph # description: | @@ -441,6 +444,8 @@ - nova-live-migration - nova-multinode-live-migration: voting: false + - nova-evacuate: + voting: false - nova-lvm - nova-multi-cell - nova-next diff --git a/playbooks/nova-evacuate/run.yaml b/playbooks/nova-evacuate/run.yaml new file mode 100644 index 000000000000..35e330a6de22 --- /dev/null +++ b/playbooks/nova-evacuate/run.yaml @@ -0,0 +1,8 @@ +--- +- hosts: all + roles: + - orchestrate-devstack + +- hosts: controller + roles: + - run-evacuate-hook diff --git a/roles/run-evacuate-hook/README.rst b/roles/run-evacuate-hook/README.rst new file mode 100644 index 000000000000..e423455aeead --- /dev/null +++ b/roles/run-evacuate-hook/README.rst @@ -0,0 +1 @@ +Run Nova evacuation tests against a multinode environment. diff --git a/roles/run-evacuate-hook/files/setup_evacuate_resources.sh b/roles/run-evacuate-hook/files/setup_evacuate_resources.sh new file mode 100755 index 000000000000..c8c385d7ff48 --- /dev/null +++ b/roles/run-evacuate-hook/files/setup_evacuate_resources.sh @@ -0,0 +1,34 @@ +#!/bin/bash +source /opt/stack/devstack/openrc admin +set -x +set -e + +image_id=$(openstack image list -f value -c ID | awk 'NR==1{print $1}') +flavor_id=$(openstack flavor list -f value -c ID | awk 'NR==1{print $1}') +network_id=$(openstack network list --no-share -f value -c ID | awk 'NR==1{print $1}') + +echo "Creating ephemeral test server on subnode" +openstack --os-compute-api-version 2.74 server create --image ${image_id} --flavor ${flavor_id} \ +--nic net-id=${network_id} --host $SUBNODE_HOSTNAME --wait evacuate-test + +# TODO(lyarwood) Use osc to launch the bfv volume +echo "Creating boot from volume test server on subnode" +nova --os-compute-api-version 2.74 boot --flavor ${flavor_id} --poll \ +--block-device id=${image_id},source=image,dest=volume,size=1,bootindex=0,shutdown=remove \ +--nic net-id=${network_id} --host ${SUBNODE_HOSTNAME} evacuate-bfv-test + +echo "Forcing down the subnode so we can evacuate from it" +openstack --os-compute-api-version 2.11 compute service set --down ${SUBNODE_HOSTNAME} nova-compute + +count=0 +status=$(openstack compute service list --host ${SUBNODE_HOSTNAME} --service nova-compute -f value -c State) +while [ "${status}" != "down" ] +do + sleep 1 + count=$((count+1)) + if [ ${count} -eq 30 ]; then + echo "Timed out waiting for subnode compute service to be marked as down" + exit 5 + fi + status=$(openstack compute service list --host ${SUBNODE_HOSTNAME} --service nova-compute -f value -c State) +done diff --git a/roles/run-evacuate-hook/files/test_evacuate.sh b/roles/run-evacuate-hook/files/test_evacuate.sh new file mode 100755 index 000000000000..bdf8d9244104 --- /dev/null +++ b/roles/run-evacuate-hook/files/test_evacuate.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# Source tempest to determine the build timeout configuration. +source /opt/stack/devstack/lib/tempest +source /opt/stack/devstack/openrc admin +set -x +set -e + +# Wait for the controller compute service to be enabled. +count=0 +status=$(openstack compute service list --host ${CONTROLLER_HOSTNAME} --service nova-compute -f value -c Status) +while [ "${status}" != "enabled" ] +do + sleep 1 + count=$((count+1)) + if [ ${count} -eq 30 ]; then + echo "Timed out waiting for controller compute service to be enabled" + exit 5 + fi + status=$(openstack compute service list --host ${CONTROLLER_HOSTNAME} --service nova-compute -f value -c Status) +done + +function evacuate_and_wait_for_active() { + local server="$1" + + nova evacuate ${server} + # Wait for the instance to go into ACTIVE state from the evacuate. + count=0 + status=$(openstack server show ${server} -f value -c status) + while [ "${status}" != "ACTIVE" ] + do + sleep 1 + count=$((count+1)) + if [ ${count} -eq ${BUILD_TIMEOUT} ]; then + echo "Timed out waiting for server ${server} to go to ACTIVE status" + exit 6 + fi + status=$(openstack server show ${server} -f value -c status) + done +} + +evacuate_and_wait_for_active evacuate-test +evacuate_and_wait_for_active evacuate-bfv-test + +# Make sure the servers moved. +for server in evacuate-test evacuate-bfv-test; do + host=$(openstack server show ${server} -f value -c OS-EXT-SRV-ATTR:host) + if [[ ${host} != ${CONTROLLER_HOSTNAME} ]]; then + echo "Unexpected host ${host} for server ${server} after evacuate." + exit 7 + fi +done + +# Cleanup test servers +openstack server delete --wait evacuate-test +openstack server delete --wait evacuate-bfv-test diff --git a/roles/run-evacuate-hook/files/test_negative_evacuate.sh b/roles/run-evacuate-hook/files/test_negative_evacuate.sh new file mode 100755 index 000000000000..b1f5f7a4af3b --- /dev/null +++ b/roles/run-evacuate-hook/files/test_negative_evacuate.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# Source tempest to determine the build timeout configuration. +source /opt/stack/devstack/lib/tempest +source /opt/stack/devstack/openrc admin +set -x +set -e + +# Now force the evacuation to the controller; we have to force to bypass the +# scheduler since we killed libvirtd which will trigger the libvirt compute +# driver to auto-disable the nova-compute service and then the ComputeFilter +# would filter out this host and we'd get NoValidHost. Normally forcing a host +# during evacuate and bypassing the scheduler is a very bad idea, but we're +# doing a negative test here. + +function evacuate_and_wait_for_error() { + local server="$1" + + echo "Forcing evacuate of ${server} to local host" + # TODO(mriedem): Use OSC when it supports evacuate. + nova --os-compute-api-version "2.67" evacuate --force ${server} ${CONTROLLER_HOSTNAME} + # Wait for the instance to go into ERROR state from the failed evacuate. + count=0 + status=$(openstack server show ${server} -f value -c status) + while [ "${status}" != "ERROR" ] + do + sleep 1 + count=$((count+1)) + if [ ${count} -eq ${BUILD_TIMEOUT} ]; then + echo "Timed out waiting for server ${server} to go to ERROR status" + exit 4 + fi + status=$(openstack server show ${server} -f value -c status) + done +} + +evacuate_and_wait_for_error evacuate-test +evacuate_and_wait_for_error evacuate-bfv-test diff --git a/roles/run-evacuate-hook/tasks/main.yaml b/roles/run-evacuate-hook/tasks/main.yaml new file mode 100644 index 000000000000..755dcdf02d7a --- /dev/null +++ b/roles/run-evacuate-hook/tasks/main.yaml @@ -0,0 +1,58 @@ +- name: Setup resources and mark the subnode as forced down + become: true + become_user: stack + shell: "/opt/stack/nova/roles/run-evacuate-hook/files/setup_evacuate_resources.sh" + environment: + SUBNODE_HOSTNAME: "{{ hostvars['compute1']['ansible_hostname'] }}" + +- name: Fence subnode by stopping q-agt and n-cpu + delegate_to: compute1 + become: true + systemd: + name: "{{ item }}" + state: stopped + with_items: + - devstack@q-agt + - devstack@n-cpu + +- name: Register running domains on subnode + delegate_to: compute1 + become: true + virt: + command: list_vms + state: running + register: subnode_vms + +- name: Destroy running domains on subnode + delegate_to: compute1 + become: true + virt: + name: "{{ item }}" + state: destroyed + with_items: "{{ subnode_vms.list_vms }}" + +- name: Stop libvirtd on "{{ inventory_hostname }}" + become: true + systemd: + name: libvirtd + state: stopped + +- name: Run negative evacuate tests + become: true + become_user: stack + shell: "/opt/stack/nova/roles/run-evacuate-hook/files/test_negative_evacuate.sh" + environment: + CONTROLLER_HOSTNAME: "{{ hostvars['controller']['ansible_hostname'] }}" + +- name: Start libvirtd on "{{ inventory_hostname }}" + become: true + systemd: + name: libvirtd + state: started + +- name: Run evacuate tests + become: true + become_user: stack + shell: "/opt/stack/nova/roles/run-evacuate-hook/files/test_evacuate.sh" + environment: + CONTROLLER_HOSTNAME: "{{ hostvars['controller']['ansible_hostname'] }}"