Add more test for graceful shutdown
Adding more tests for graceful shutdown: - shutdown the destination compute and see how live and cold migration progress - start build instance and ocne comoute start building instance then shutdown the comoute service and see if build instance finish or not. - revert resize server Partial implement blueprint nova-services-graceful-shutdown-part1 Change-Id: I57132fb7b7fa614dfc138508581ff5a67aaed906 Signed-off-by: Ghanshyam Maan <gmaan.os14@gmail.com>
This commit is contained in:
43
roles/run-graceful-shutdown-tests/files/build_instance.sh
Executable file
43
roles/run-graceful-shutdown-tests/files/build_instance.sh
Executable file
@@ -0,0 +1,43 @@
|
||||
#!/bin/bash
|
||||
source /opt/stack/devstack/openrc admin
|
||||
set -x
|
||||
set -e
|
||||
|
||||
timeout=60
|
||||
|
||||
image_id=$(openstack image list -f value -c ID | awk 'NR==1{print $1}')
|
||||
flavor_id=$(openstack flavor list -f value -c ID | awk 'NR==1{print $1}')
|
||||
network_id=$(openstack network list --no-share -f value -c ID | awk 'NR==1{print $1}')
|
||||
|
||||
echo "Creating test server on subnode"
|
||||
openstack --os-compute-api-version 2.74 server create --image ${image_id} --flavor ${flavor_id} \
|
||||
--nic net-id=${network_id} --host ${SUBNODE_HOSTNAME} server-build
|
||||
|
||||
# Wait for the server vm_state to reach BUILDING so that we know that compute has
|
||||
# started the build request.
|
||||
count=0
|
||||
while true; do
|
||||
vm_state=$(openstack server show server-build -f value -c OS-EXT-STS:vm_state)
|
||||
|
||||
if [ "${vm_state}" == "building" ]; then
|
||||
echo "Server is in Building"
|
||||
break
|
||||
fi
|
||||
|
||||
if [ "${vm_state}" == "active" ]; then
|
||||
echo "Server became active before SIGTERM was sent"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if [ "${vm_state}" == "error" ]; then
|
||||
echo "Server went to error vm_state"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
sleep 1
|
||||
count=$((count+1))
|
||||
if [ ${count} -eq ${timeout} ]; then
|
||||
echo "Timed out waiting for server to reach BUILDING vm_state"
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
88
roles/run-graceful-shutdown-tests/files/start_revert_resize.sh
Executable file
88
roles/run-graceful-shutdown-tests/files/start_revert_resize.sh
Executable file
@@ -0,0 +1,88 @@
|
||||
#!/bin/bash
|
||||
source /opt/stack/devstack/openrc admin
|
||||
set -x
|
||||
set -e
|
||||
|
||||
timeout=196
|
||||
|
||||
image_id=$(openstack image list -f value -c ID | awk 'NR==1{print $1}')
|
||||
flavor_id=$(openstack flavor list -f value -c ID | awk 'NR==1{print $1}')
|
||||
network_id=$(openstack network list --no-share -f value -c ID | awk 'NR==1{print $1}')
|
||||
|
||||
echo "Creating test server on subnode for graceful shutdown revert resize test"
|
||||
openstack --os-compute-api-version 2.74 server create --image ${image_id} --flavor ${flavor_id} \
|
||||
--nic net-id=${network_id} --host ${SUBNODE_HOSTNAME} --wait server-rr
|
||||
|
||||
echo "Migrate server-rr to ${CONTROLLER_HOSTNAME}"
|
||||
openstack --os-compute-api-version 2.56 server migrate \
|
||||
--host ${CONTROLLER_HOSTNAME} server-rr
|
||||
|
||||
# Wait for the migrate to complete
|
||||
count=0
|
||||
while true; do
|
||||
status=$(openstack server show server-rr -f value -c status)
|
||||
if [ "${status}" == "VERIFY_RESIZE" ]; then
|
||||
echo "Migration completed, server is in VERIFY_RESIZE state"
|
||||
break
|
||||
fi
|
||||
if [ "${status}" == "ERROR" ]; then
|
||||
echo "Server went to ERROR status during cold migration"
|
||||
exit 2
|
||||
fi
|
||||
sleep 5
|
||||
count=$((count+1))
|
||||
if [ ${count} -eq 20 ]; then
|
||||
echo "Timed out waiting for server-rr to reach VERIFY_RESIZE"
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
|
||||
# Start and wait for the revert resize to be in progress.
|
||||
count=0
|
||||
revert_started=False
|
||||
revert_completed=False
|
||||
|
||||
status=$(openstack server show server-rr -f value -c status)
|
||||
if [ "${status}" == "VERIFY_RESIZE" ]; then
|
||||
echo "Starting revert resize of server-rr"
|
||||
openstack server resize revert server-rr
|
||||
else
|
||||
echo "Revert resize skipped"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
while true; do
|
||||
task_state=$(openstack server show server-rr -f value -c OS-EXT-STS:task_state)
|
||||
status=$(openstack server show server-rr -f value -c status)
|
||||
|
||||
if [ "${revert_started}" != "True" ] && [ "${revert_completed}" != "True" ]; then
|
||||
if [ "${task_state}" == "resize_reverting" ]; then
|
||||
echo "Revert resize is in progress"
|
||||
# task_state is set by the API before it send the revert_resize RPC call
|
||||
# to compute. We can try to sleep here for 2 sec and see if compute start
|
||||
# the revert_resize and shutdown can be initiated before it finish. This
|
||||
# is best try but no guarantee for that timing.
|
||||
sleep 2
|
||||
revert_started=True
|
||||
fi
|
||||
if [ "${status}" == "ACTIVE" ]; then
|
||||
echo "Revert resize appears to have already completed"
|
||||
revert_completed=True
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "${revert_started}" == "True" ]; then
|
||||
break
|
||||
fi
|
||||
|
||||
if [ "${revert_completed}" == "True" ]; then
|
||||
echo "Revert resize completed before SIGTERM was sent"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
count=$((count+1))
|
||||
if [ ${count} -eq ${timeout} ]; then
|
||||
echo "Timed out waiting for revert resize to start"
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
25
roles/run-graceful-shutdown-tests/files/verify_build_instance.sh
Executable file
25
roles/run-graceful-shutdown-tests/files/verify_build_instance.sh
Executable file
@@ -0,0 +1,25 @@
|
||||
#!/bin/bash
|
||||
source /opt/stack/devstack/openrc admin
|
||||
set -x
|
||||
set -e
|
||||
|
||||
# Wait for the server to finish building and become active which confirms that
|
||||
# the build completed during graceful shutdown.
|
||||
build_start=$(date +%s)
|
||||
while true; do
|
||||
status=$(openstack server show server-build -f value -c status)
|
||||
|
||||
if [ "${status}" == "ACTIVE" ]; then
|
||||
build_end=$(date +%s)
|
||||
build_duration=$((build_end - build_start))
|
||||
echo "Build completed in ${build_duration} seconds."
|
||||
break
|
||||
fi
|
||||
|
||||
if [ "${status}" == "ERROR" ]; then
|
||||
echo "Server went to ERROR status."
|
||||
exit 6
|
||||
fi
|
||||
|
||||
sleep 5
|
||||
done
|
||||
35
roles/run-graceful-shutdown-tests/files/verify_revert_resize.sh
Executable file
35
roles/run-graceful-shutdown-tests/files/verify_revert_resize.sh
Executable file
@@ -0,0 +1,35 @@
|
||||
#!/bin/bash
|
||||
source /opt/stack/devstack/openrc admin
|
||||
set -x
|
||||
set -e
|
||||
|
||||
# Wait for the server to finish reverting resize
|
||||
revert_start=$(date +%s)
|
||||
while true; do
|
||||
status=$(openstack server show server-rr -f value -c status)
|
||||
task_state=$(openstack server show server-rr -f value -c OS-EXT-STS:task_state)
|
||||
|
||||
if [ "${status}" == "ACTIVE" ] && { [ "${task_state}" == "None" ] || [ -z "${task_state}" ]; }; then
|
||||
revert_end=$(date +%s)
|
||||
revert_duration=$((revert_end - revert_start))
|
||||
echo "Revert resize completed in ${revert_duration} seconds."
|
||||
break
|
||||
fi
|
||||
|
||||
if [ "${status}" == "ERROR" ]; then
|
||||
echo "Server went to ERROR status during revert resize"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
sleep 5
|
||||
done
|
||||
|
||||
# Make sure the server moved back to the subnode.
|
||||
host=$(openstack server show server-rr -f value -c OS-EXT-SRV-ATTR:host)
|
||||
if [ "${host}" != "${SUBNODE_HOSTNAME}" ]; then
|
||||
echo "Unexpected host ${host} for server after revert resize during graceful shutdown."
|
||||
exit 4
|
||||
fi
|
||||
|
||||
echo "Revert resize during graceful shutdown completed successfully"
|
||||
echo "Server server-rr is ACTIVE on ${host}"
|
||||
@@ -100,7 +100,212 @@
|
||||
script: "cleanup_test_servers.sh server-cm1"
|
||||
ignore_errors: true
|
||||
|
||||
- name: Graceful shutdown dest compute live migration
|
||||
block:
|
||||
- name: Start live migrations of test servers
|
||||
become: true
|
||||
become_user: stack
|
||||
script: "start_live_migration.sh server-lm2"
|
||||
environment:
|
||||
SUBNODE_HOSTNAME: "{{ hostvars['compute1']['ansible_hostname'] }}"
|
||||
CONTROLLER_HOSTNAME: "{{ hostvars['controller']['ansible_hostname'] }}"
|
||||
register: start_live_migrations_result_dest
|
||||
failed_when: start_live_migrations_result_dest.rc not in [0, 2]
|
||||
|
||||
- name: Set fact if migrations completed or timed out before SIGTERM to dest compute
|
||||
set_fact:
|
||||
live_migrations_completed_or_timeout_dest: "{{ start_live_migrations_result_dest.rc == 2 }}"
|
||||
|
||||
- name: Run graceful shutdown tests
|
||||
when: not live_migrations_completed_or_timeout_dest
|
||||
block:
|
||||
- name: Send SIGTERM to dest compute to start the dest compute graceful shutdown
|
||||
delegate_to: controller
|
||||
become: true
|
||||
shell: "kill -15 $(systemctl show devstack@n-cpu -p MainPID --value)"
|
||||
|
||||
- name: Verify live migration is completed during graceful shutdown
|
||||
become: true
|
||||
become_user: stack
|
||||
script: "verify_live_migration.sh server-lm2"
|
||||
environment:
|
||||
CONTROLLER_HOSTNAME: "{{ hostvars['controller']['ansible_hostname'] }}"
|
||||
|
||||
# Sleep for 180 sec: default graceful_shutdown_timeout
|
||||
- name: Sleep for 180 seconds to allow dest compute graceful shutdown to complete
|
||||
pause:
|
||||
seconds: 180
|
||||
|
||||
- name: Verify dest compute service is stopped after graceful shutdown
|
||||
become: true
|
||||
become_user: stack
|
||||
script: "start_and_verify_compute_service.sh {{ hostvars['controller']['ansible_hostname'] }} inactive"
|
||||
|
||||
- name: Start and verify dest compute service is running
|
||||
become: true
|
||||
become_user: stack
|
||||
script: "start_and_verify_compute_service.sh {{ hostvars['controller']['ansible_hostname'] }}"
|
||||
|
||||
- name: Cleanup test servers
|
||||
become: true
|
||||
become_user: stack
|
||||
script: "cleanup_test_servers.sh server-lm2"
|
||||
ignore_errors: true
|
||||
|
||||
- name: Graceful shutdown dest compute cold migration
|
||||
block:
|
||||
- name: Start cold migrations of test servers
|
||||
become: true
|
||||
become_user: stack
|
||||
script: "start_cold_migration.sh server-cm2"
|
||||
environment:
|
||||
SUBNODE_HOSTNAME: "{{ hostvars['compute1']['ansible_hostname'] }}"
|
||||
CONTROLLER_HOSTNAME: "{{ hostvars['controller']['ansible_hostname'] }}"
|
||||
register: start_cold_migrations_result_dest
|
||||
failed_when: start_cold_migrations_result_dest.rc not in [0, 2]
|
||||
|
||||
- name: Set fact if migrations completed or timed out before SIGTERM to dest compute
|
||||
set_fact:
|
||||
cold_migrations_completed_or_timeout_dest: "{{ start_cold_migrations_result_dest.rc == 2 }}"
|
||||
|
||||
- name: Run graceful shutdown tests
|
||||
when: not cold_migrations_completed_or_timeout_dest
|
||||
block:
|
||||
- name: Send SIGTERM to dest compute to start the dest compute graceful shutdown
|
||||
delegate_to: controller
|
||||
become: true
|
||||
shell: "kill -15 $(systemctl show devstack@n-cpu -p MainPID --value)"
|
||||
|
||||
- name: Verify cold migration is completed during graceful shutdown
|
||||
become: true
|
||||
become_user: stack
|
||||
script: "verify_cold_migration.sh server-cm2"
|
||||
|
||||
# Sleep for 180 sec: default graceful_shutdown_timeout
|
||||
- name: Sleep for 180 seconds to allow dest compute graceful shutdown to complete
|
||||
pause:
|
||||
seconds: 180
|
||||
|
||||
- name: Verify dest compute service is stopped after graceful shutdown
|
||||
become: true
|
||||
become_user: stack
|
||||
script: "start_and_verify_compute_service.sh {{ hostvars['controller']['ansible_hostname'] }} inactive"
|
||||
|
||||
- name: Start and verify dest compute service is running
|
||||
become: true
|
||||
become_user: stack
|
||||
script: "start_and_verify_compute_service.sh {{ hostvars['controller']['ansible_hostname'] }}"
|
||||
|
||||
- name: Cleanup test servers
|
||||
become: true
|
||||
become_user: stack
|
||||
script: "cleanup_test_servers.sh server-cm2"
|
||||
ignore_errors: true
|
||||
|
||||
- name: Graceful shutdown while building instance
|
||||
block:
|
||||
- name: Build instance on subnode
|
||||
become: true
|
||||
become_user: stack
|
||||
script: "build_instance.sh"
|
||||
environment:
|
||||
SUBNODE_HOSTNAME: "{{ hostvars['compute1']['ansible_hostname'] }}"
|
||||
register: build_instance_result
|
||||
failed_when: build_instance_result.rc not in [0, 2]
|
||||
|
||||
- name: Set fact if build completed before SIGTERM
|
||||
set_fact:
|
||||
build_completed_or_error: "{{ build_instance_result.rc == 2 }}"
|
||||
|
||||
- name: Run graceful shutdown tests
|
||||
when: not build_completed_or_error
|
||||
block:
|
||||
- name: Send SIGTERM to subnode compute service
|
||||
delegate_to: compute1
|
||||
become: true
|
||||
shell: "kill -15 $(systemctl show devstack@n-cpu -p MainPID --value)"
|
||||
|
||||
- name: Verify build instance is completed and it is in active state
|
||||
become: true
|
||||
become_user: stack
|
||||
script: "verify_build_instance.sh"
|
||||
|
||||
# Sleep for 180 sec: default graceful_shutdown_timeout
|
||||
- name: Sleep for 180 seconds to allow graceful shutdown to complete
|
||||
pause:
|
||||
seconds: 180
|
||||
|
||||
- name: Verify subnode compute service is stopped after graceful shutdown
|
||||
become: true
|
||||
become_user: stack
|
||||
script: "start_and_verify_compute_service.sh {{ hostvars['compute1']['ansible_hostname'] }} inactive"
|
||||
|
||||
- name: Verify subnode compute service is running
|
||||
become: true
|
||||
become_user: stack
|
||||
script: "start_and_verify_compute_service.sh {{ hostvars['compute1']['ansible_hostname'] }}"
|
||||
|
||||
- name: Cleanup test servers
|
||||
become: true
|
||||
become_user: stack
|
||||
script: "cleanup_test_servers.sh server-build"
|
||||
ignore_errors: true
|
||||
|
||||
- name: Graceful shutdown revert resize
|
||||
block:
|
||||
- name: Start revert resize of test server
|
||||
become: true
|
||||
become_user: stack
|
||||
script: "start_revert_resize.sh"
|
||||
environment:
|
||||
SUBNODE_HOSTNAME: "{{ hostvars['compute1']['ansible_hostname'] }}"
|
||||
CONTROLLER_HOSTNAME: "{{ hostvars['controller']['ansible_hostname'] }}"
|
||||
register: start_revert_resize_result
|
||||
failed_when: start_revert_resize_result.rc not in [0, 2]
|
||||
|
||||
- name: Set fact if revert resize completed before SIGTERM
|
||||
set_fact:
|
||||
revert_resize_not_done: "{{ start_revert_resize_result.rc == 2 }}"
|
||||
|
||||
- name: Run graceful shutdown tests
|
||||
when: not revert_resize_not_done
|
||||
block:
|
||||
- name: Send SIGTERM to controller during revert resize
|
||||
delegate_to: controller
|
||||
become: true
|
||||
shell: "kill -15 $(systemctl show devstack@n-cpu -p MainPID --value)"
|
||||
|
||||
- name: Verify revert resize is completed during graceful shutdown
|
||||
become: true
|
||||
become_user: stack
|
||||
script: "verify_revert_resize.sh"
|
||||
environment:
|
||||
SUBNODE_HOSTNAME: "{{ hostvars['compute1']['ansible_hostname'] }}"
|
||||
|
||||
# Sleep for 180 sec: default graceful_shutdown_timeout
|
||||
- name: Sleep for 180 seconds to allow graceful shutdown to complete
|
||||
pause:
|
||||
seconds: 180
|
||||
|
||||
- name: Verify dest compute service is stopped after graceful shutdown
|
||||
become: true
|
||||
become_user: stack
|
||||
script: "start_and_verify_compute_service.sh {{ hostvars['controller']['ansible_hostname'] }} inactive"
|
||||
|
||||
- name: Start and verify source compute service is running
|
||||
become: true
|
||||
become_user: stack
|
||||
script: "start_and_verify_compute_service.sh {{ hostvars['controller']['ansible_hostname'] }}"
|
||||
|
||||
- name: Cleanup test servers
|
||||
become: true
|
||||
become_user: stack
|
||||
script: "cleanup_test_servers.sh server-rr"
|
||||
ignore_errors: true
|
||||
|
||||
- name: Fail if any test is skipped
|
||||
fail:
|
||||
msg: "One or more test is skipped due to operation is either completed or timed out before SIGTERM signal."
|
||||
when: live_migrations_completed_or_timeout or cold_migrations_completed_or_timeout
|
||||
when: live_migrations_completed_or_timeout or cold_migrations_completed_or_timeout or
|
||||
live_migrations_completed_or_timeout_dest or cold_migrations_completed_or_timeout_dest or
|
||||
build_completed_or_error or revert_resize_not_done
|
||||
|
||||
Reference in New Issue
Block a user