Browse Source

zuul: Introduce nova-evacuate

This change reworks the evacuation parts of the original
nova-live-migration job into a zuulv3 native ansible role and initial
job covering local ephemeral and iSCSI/LVM volume attached instance
evacuation. Future jobs will cover ceph and other storage backends.

Change-Id: I380e9ca1e6a84da2b2ae577fb48781bf5c740e23
changes/83/744883/27
Lee Yarwood 1 year ago
parent
commit
f357d80407
  1. 19
      .zuul.yaml
  2. 8
      playbooks/nova-evacuate/run.yaml
  3. 1
      roles/run-evacuate-hook/README.rst
  4. 34
      roles/run-evacuate-hook/files/setup_evacuate_resources.sh
  5. 55
      roles/run-evacuate-hook/files/test_evacuate.sh
  6. 37
      roles/run-evacuate-hook/files/test_negative_evacuate.sh
  7. 58
      roles/run-evacuate-hook/tasks/main.yaml

19
.zuul.yaml

@ -95,13 +95,16 @@
# description: |
# Run tempest live migration tests against ceph ephemeral storage and
# cinder volumes.
#
#- job:
# name: nova-multinode-evacuate
# description: |
# Verifiy the evacuation of instances with local qcow2 ephemeral disks
# from down compute hosts.
#
- job:
name: nova-evacuate
parent: tempest-multinode-full-py3
description: |
Verifiy the evacuation of instances with local qcow2 ephemeral disks
and attached iSCSI/LVM volumes from down compute hosts.
irrelevant-files: *dsvm-irrelevant-files
run: playbooks/nova-evacuate/run.yaml
#- job:
# name: nova-multinode-evacuate-ceph
# description: |
@ -441,6 +444,8 @@
- nova-live-migration
- nova-multinode-live-migration:
voting: false
- nova-evacuate:
voting: false
- nova-lvm
- nova-multi-cell
- nova-next

8
playbooks/nova-evacuate/run.yaml

@ -0,0 +1,8 @@
---
- hosts: all
roles:
- orchestrate-devstack
- hosts: controller
roles:
- run-evacuate-hook

1
roles/run-evacuate-hook/README.rst

@ -0,0 +1 @@
Run Nova evacuation tests against a multinode environment.

34
roles/run-evacuate-hook/files/setup_evacuate_resources.sh

@ -0,0 +1,34 @@
#!/bin/bash
source /opt/stack/devstack/openrc admin
set -x
set -e
image_id=$(openstack image list -f value -c ID | awk 'NR==1{print $1}')
flavor_id=$(openstack flavor list -f value -c ID | awk 'NR==1{print $1}')
network_id=$(openstack network list --no-share -f value -c ID | awk 'NR==1{print $1}')
echo "Creating ephemeral test server on subnode"
openstack --os-compute-api-version 2.74 server create --image ${image_id} --flavor ${flavor_id} \
--nic net-id=${network_id} --host $SUBNODE_HOSTNAME --wait evacuate-test
# TODO(lyarwood) Use osc to launch the bfv volume
echo "Creating boot from volume test server on subnode"
nova --os-compute-api-version 2.74 boot --flavor ${flavor_id} --poll \
--block-device id=${image_id},source=image,dest=volume,size=1,bootindex=0,shutdown=remove \
--nic net-id=${network_id} --host ${SUBNODE_HOSTNAME} evacuate-bfv-test
echo "Forcing down the subnode so we can evacuate from it"
openstack --os-compute-api-version 2.11 compute service set --down ${SUBNODE_HOSTNAME} nova-compute
count=0
status=$(openstack compute service list --host ${SUBNODE_HOSTNAME} --service nova-compute -f value -c State)
while [ "${status}" != "down" ]
do
sleep 1
count=$((count+1))
if [ ${count} -eq 30 ]; then
echo "Timed out waiting for subnode compute service to be marked as down"
exit 5
fi
status=$(openstack compute service list --host ${SUBNODE_HOSTNAME} --service nova-compute -f value -c State)
done

55
roles/run-evacuate-hook/files/test_evacuate.sh

@ -0,0 +1,55 @@
#!/bin/bash
# Source tempest to determine the build timeout configuration.
source /opt/stack/devstack/lib/tempest
source /opt/stack/devstack/openrc admin
set -x
set -e
# Wait for the controller compute service to be enabled.
count=0
status=$(openstack compute service list --host ${CONTROLLER_HOSTNAME} --service nova-compute -f value -c Status)
while [ "${status}" != "enabled" ]
do
sleep 1
count=$((count+1))
if [ ${count} -eq 30 ]; then
echo "Timed out waiting for controller compute service to be enabled"
exit 5
fi
status=$(openstack compute service list --host ${CONTROLLER_HOSTNAME} --service nova-compute -f value -c Status)
done
function evacuate_and_wait_for_active() {
local server="$1"
nova evacuate ${server}
# Wait for the instance to go into ACTIVE state from the evacuate.
count=0
status=$(openstack server show ${server} -f value -c status)
while [ "${status}" != "ACTIVE" ]
do
sleep 1
count=$((count+1))
if [ ${count} -eq ${BUILD_TIMEOUT} ]; then
echo "Timed out waiting for server ${server} to go to ACTIVE status"
exit 6
fi
status=$(openstack server show ${server} -f value -c status)
done
}
evacuate_and_wait_for_active evacuate-test
evacuate_and_wait_for_active evacuate-bfv-test
# Make sure the servers moved.
for server in evacuate-test evacuate-bfv-test; do
host=$(openstack server show ${server} -f value -c OS-EXT-SRV-ATTR:host)
if [[ ${host} != ${CONTROLLER_HOSTNAME} ]]; then
echo "Unexpected host ${host} for server ${server} after evacuate."
exit 7
fi
done
# Cleanup test servers
openstack server delete --wait evacuate-test
openstack server delete --wait evacuate-bfv-test

37
roles/run-evacuate-hook/files/test_negative_evacuate.sh

@ -0,0 +1,37 @@
#!/bin/bash
# Source tempest to determine the build timeout configuration.
source /opt/stack/devstack/lib/tempest
source /opt/stack/devstack/openrc admin
set -x
set -e
# Now force the evacuation to the controller; we have to force to bypass the
# scheduler since we killed libvirtd which will trigger the libvirt compute
# driver to auto-disable the nova-compute service and then the ComputeFilter
# would filter out this host and we'd get NoValidHost. Normally forcing a host
# during evacuate and bypassing the scheduler is a very bad idea, but we're
# doing a negative test here.
function evacuate_and_wait_for_error() {
local server="$1"
echo "Forcing evacuate of ${server} to local host"
# TODO(mriedem): Use OSC when it supports evacuate.
nova --os-compute-api-version "2.67" evacuate --force ${server} ${CONTROLLER_HOSTNAME}
# Wait for the instance to go into ERROR state from the failed evacuate.
count=0
status=$(openstack server show ${server} -f value -c status)
while [ "${status}" != "ERROR" ]
do
sleep 1
count=$((count+1))
if [ ${count} -eq ${BUILD_TIMEOUT} ]; then
echo "Timed out waiting for server ${server} to go to ERROR status"
exit 4
fi
status=$(openstack server show ${server} -f value -c status)
done
}
evacuate_and_wait_for_error evacuate-test
evacuate_and_wait_for_error evacuate-bfv-test

58
roles/run-evacuate-hook/tasks/main.yaml

@ -0,0 +1,58 @@
- name: Setup resources and mark the subnode as forced down
become: true
become_user: stack
shell: "/opt/stack/nova/roles/run-evacuate-hook/files/setup_evacuate_resources.sh"
environment:
SUBNODE_HOSTNAME: "{{ hostvars['compute1']['ansible_hostname'] }}"
- name: Fence subnode by stopping q-agt and n-cpu
delegate_to: compute1
become: true
systemd:
name: "{{ item }}"
state: stopped
with_items:
- devstack@q-agt
- devstack@n-cpu
- name: Register running domains on subnode
delegate_to: compute1
become: true
virt:
command: list_vms
state: running
register: subnode_vms
- name: Destroy running domains on subnode
delegate_to: compute1
become: true
virt:
name: "{{ item }}"
state: destroyed
with_items: "{{ subnode_vms.list_vms }}"
- name: Stop libvirtd on "{{ inventory_hostname }}"
become: true
systemd:
name: libvirtd
state: stopped
- name: Run negative evacuate tests
become: true
become_user: stack
shell: "/opt/stack/nova/roles/run-evacuate-hook/files/test_negative_evacuate.sh"
environment:
CONTROLLER_HOSTNAME: "{{ hostvars['controller']['ansible_hostname'] }}"
- name: Start libvirtd on "{{ inventory_hostname }}"
become: true
systemd:
name: libvirtd
state: started
- name: Run evacuate tests
become: true
become_user: stack
shell: "/opt/stack/nova/roles/run-evacuate-hook/files/test_evacuate.sh"
environment:
CONTROLLER_HOSTNAME: "{{ hostvars['controller']['ansible_hostname'] }}"
Loading…
Cancel
Save