zuul: Replace nova-live-migration with zuulv3 jobs

This change removes the original nova-live-migration job and replaces it
directly with the new Focal based zuulv3 native job.

The nova-dsvm-multinode-base base job is no longer used and so also
removed as part of this change.

Note that this new nova-live-migration job does not yet contain any
ceph coverage like the original, this is still pending and will be
completed early in the W cycle.

This change is being merged ahead of this to resolve bug #1901739, a
known QEMU -drive issue caused by the previous jobs use of libvirt 5.4.0
as provided by Ubuntu Bionic. The fix here being the migration to Ubuntu
Focal based jobs and libvirt 6.0.0 that now defaults to using QEMU
-blockdev.

NOTE(lyarwood): This change squashes the following changes into it to
ensure we end up with a passing zuulv3 Focal based job in
stable/victoria. This includes the reintroduction of
nova-dsvm-multinode-base that was incorrectly removed by this change on
master while still being used.

zuul: Introduce nova-evacuate
(cherry picked from commit f357d80407)

nova-evacuate: Disable libvirtd service and sockets during negative tests
(cherry picked from commit 226250beb6)

zuul: Merge nova-evacuate into nova-multinode-live-migration
(cherry picked from commit c0fe95fcc5)

zuul: Reintroduce nova-dsvm-multinode-base
(cherry picked from commit be752b8175)

nova-live-migration: Disable *all* virt services during negative tests
(cherry picked from commit 76360e566b)

Closes-Bug: #1901739
Change-Id: Ib342e2d3c395830b4667a60de7e492d3b9de2f0a
(cherry picked from commit 4ac4a04d18)
This commit is contained in:
Lee Yarwood 2020-08-05 11:40:11 +01:00
parent a806b1dc4c
commit 478be6f4fb
8 changed files with 228 additions and 22 deletions

View File

@ -56,21 +56,8 @@
bindep_profile: test py38
timeout: 3600
# TODO(lyarwood): Remove once the new zuulv3 nova-multinode jobs are voting
- job:
name: nova-live-migration
parent: nova-dsvm-multinode-base
description: |
Run tempest live migration tests against both local storage and shared
storage using ceph (the environment is reconfigured for ceph after the
local storage tests are run). Also runs simple evacuate tests.
Config drive is forced on all instances. Runs with python 3.
run: playbooks/legacy/nova-live-migration/run.yaml
post-run: playbooks/legacy/nova-live-migration/post.yaml
irrelevant-files: *dsvm-irrelevant-files
- job:
name: nova-multinode-live-migration
parent: tempest-multinode-full-py3
description: |
Run tempest live migration tests against local qcow2 ephemeral storage
@ -86,6 +73,7 @@
volume_backed_live_migration: true
block_migration_for_live_migration: true
block_migrate_cinder_iscsi: true
post-run: playbooks/nova-live-migration/post-run.yaml
# TODO(lyarwood): The following jobs need to be written as part of the
# migration to zuulv3 before nova-live-migration can be removed:
@ -95,13 +83,6 @@
# description: |
# Run tempest live migration tests against ceph ephemeral storage and
# cinder volumes.
#
#- job:
# name: nova-multinode-evacuate
# description: |
# Verifiy the evacuation of instances with local qcow2 ephemeral disks
# from down compute hosts.
#
#- job:
# name: nova-multinode-evacuate-ceph
# description: |
@ -439,8 +420,6 @@
- ^(?!nova/network/.*)(?!nova/virt/libvirt/vif.py).*$
- nova-grenade-multinode
- nova-live-migration
- nova-multinode-live-migration:
voting: false
- nova-lvm
- nova-multi-cell
- nova-next

View File

@ -0,0 +1,8 @@
---
- hosts: all
roles:
- orchestrate-devstack
- hosts: controller
roles:
- run-evacuate-hook

View File

@ -0,0 +1,10 @@
---
- hosts: tempest
become: true
roles:
- role: fetch-subunit-output
zuul_work_dir: '{{ devstack_base_dir }}/tempest'
- role: process-stackviz
- hosts: controller
roles:
- run-evacuate-hook

View File

@ -0,0 +1 @@
Run Nova evacuation tests against a multinode environment.

View File

@ -0,0 +1,34 @@
#!/bin/bash
source /opt/stack/devstack/openrc admin
set -x
set -e
image_id=$(openstack image list -f value -c ID | awk 'NR==1{print $1}')
flavor_id=$(openstack flavor list -f value -c ID | awk 'NR==1{print $1}')
network_id=$(openstack network list --no-share -f value -c ID | awk 'NR==1{print $1}')
echo "Creating ephemeral test server on subnode"
openstack --os-compute-api-version 2.74 server create --image ${image_id} --flavor ${flavor_id} \
--nic net-id=${network_id} --host $SUBNODE_HOSTNAME --wait evacuate-test
# TODO(lyarwood) Use osc to launch the bfv volume
echo "Creating boot from volume test server on subnode"
nova --os-compute-api-version 2.74 boot --flavor ${flavor_id} --poll \
--block-device id=${image_id},source=image,dest=volume,size=1,bootindex=0,shutdown=remove \
--nic net-id=${network_id} --host ${SUBNODE_HOSTNAME} evacuate-bfv-test
echo "Forcing down the subnode so we can evacuate from it"
openstack --os-compute-api-version 2.11 compute service set --down ${SUBNODE_HOSTNAME} nova-compute
count=0
status=$(openstack compute service list --host ${SUBNODE_HOSTNAME} --service nova-compute -f value -c State)
while [ "${status}" != "down" ]
do
sleep 1
count=$((count+1))
if [ ${count} -eq 30 ]; then
echo "Timed out waiting for subnode compute service to be marked as down"
exit 5
fi
status=$(openstack compute service list --host ${SUBNODE_HOSTNAME} --service nova-compute -f value -c State)
done

View File

@ -0,0 +1,55 @@
#!/bin/bash
# Source tempest to determine the build timeout configuration.
source /opt/stack/devstack/lib/tempest
source /opt/stack/devstack/openrc admin
set -x
set -e
# Wait for the controller compute service to be enabled.
count=0
status=$(openstack compute service list --host ${CONTROLLER_HOSTNAME} --service nova-compute -f value -c Status)
while [ "${status}" != "enabled" ]
do
sleep 1
count=$((count+1))
if [ ${count} -eq 30 ]; then
echo "Timed out waiting for controller compute service to be enabled"
exit 5
fi
status=$(openstack compute service list --host ${CONTROLLER_HOSTNAME} --service nova-compute -f value -c Status)
done
function evacuate_and_wait_for_active() {
local server="$1"
nova evacuate ${server}
# Wait for the instance to go into ACTIVE state from the evacuate.
count=0
status=$(openstack server show ${server} -f value -c status)
while [ "${status}" != "ACTIVE" ]
do
sleep 1
count=$((count+1))
if [ ${count} -eq ${BUILD_TIMEOUT} ]; then
echo "Timed out waiting for server ${server} to go to ACTIVE status"
exit 6
fi
status=$(openstack server show ${server} -f value -c status)
done
}
evacuate_and_wait_for_active evacuate-test
evacuate_and_wait_for_active evacuate-bfv-test
# Make sure the servers moved.
for server in evacuate-test evacuate-bfv-test; do
host=$(openstack server show ${server} -f value -c OS-EXT-SRV-ATTR:host)
if [[ ${host} != ${CONTROLLER_HOSTNAME} ]]; then
echo "Unexpected host ${host} for server ${server} after evacuate."
exit 7
fi
done
# Cleanup test servers
openstack server delete --wait evacuate-test
openstack server delete --wait evacuate-bfv-test

View File

@ -0,0 +1,37 @@
#!/bin/bash
# Source tempest to determine the build timeout configuration.
source /opt/stack/devstack/lib/tempest
source /opt/stack/devstack/openrc admin
set -x
set -e
# Now force the evacuation to the controller; we have to force to bypass the
# scheduler since we killed libvirtd which will trigger the libvirt compute
# driver to auto-disable the nova-compute service and then the ComputeFilter
# would filter out this host and we'd get NoValidHost. Normally forcing a host
# during evacuate and bypassing the scheduler is a very bad idea, but we're
# doing a negative test here.
function evacuate_and_wait_for_error() {
local server="$1"
echo "Forcing evacuate of ${server} to local host"
# TODO(mriedem): Use OSC when it supports evacuate.
nova --os-compute-api-version "2.67" evacuate --force ${server} ${CONTROLLER_HOSTNAME}
# Wait for the instance to go into ERROR state from the failed evacuate.
count=0
status=$(openstack server show ${server} -f value -c status)
while [ "${status}" != "ERROR" ]
do
sleep 1
count=$((count+1))
if [ ${count} -eq ${BUILD_TIMEOUT} ]; then
echo "Timed out waiting for server ${server} to go to ERROR status"
exit 4
fi
status=$(openstack server show ${server} -f value -c status)
done
}
evacuate_and_wait_for_error evacuate-test
evacuate_and_wait_for_error evacuate-bfv-test

View File

@ -0,0 +1,82 @@
- name: Setup resources and mark the subnode as forced down
become: true
become_user: stack
shell: "/opt/stack/nova/roles/run-evacuate-hook/files/setup_evacuate_resources.sh"
environment:
SUBNODE_HOSTNAME: "{{ hostvars['compute1']['ansible_hostname'] }}"
- name: Fence subnode by stopping q-agt and n-cpu
delegate_to: compute1
become: true
systemd:
name: "{{ item }}"
state: stopped
with_items:
- devstack@q-agt
- devstack@n-cpu
- name: Register running domains on subnode
delegate_to: compute1
become: true
virt:
command: list_vms
state: running
register: subnode_vms
- name: Destroy running domains on subnode
delegate_to: compute1
become: true
virt:
name: "{{ item }}"
state: destroyed
with_items: "{{ subnode_vms.list_vms }}"
- name: Stop libvirtd on "{{ inventory_hostname }}"
become: true
systemd:
name: "{{ item }}"
state: stopped
enabled: no
with_items:
- libvirtd.service
- libvirtd.socket
- libvirtd-admin.socket
- libvirtd-ro.socket
- virtlogd.service
- virtlogd-admin.socket
- virtlogd.socket
- virtlockd.service
- virtlockd-admin.socket
- virtlockd.socket
- name: Run negative evacuate tests
become: true
become_user: stack
shell: "/opt/stack/nova/roles/run-evacuate-hook/files/test_negative_evacuate.sh"
environment:
CONTROLLER_HOSTNAME: "{{ hostvars['controller']['ansible_hostname'] }}"
- name: Start libvirtd on "{{ inventory_hostname }}"
become: true
systemd:
name: "{{ item }}"
state: started
enabled: yes
with_items:
- libvirtd.service
- libvirtd.socket
- libvirtd-admin.socket
- libvirtd-ro.socket
- virtlogd.service
- virtlogd-admin.socket
- virtlogd.socket
- virtlockd.service
- virtlockd-admin.socket
- virtlockd.socket
- name: Run evacuate tests
become: true
become_user: stack
shell: "/opt/stack/nova/roles/run-evacuate-hook/files/test_evacuate.sh"
environment:
CONTROLLER_HOSTNAME: "{{ hostvars['controller']['ansible_hostname'] }}"