Julia Kreger 98c4c96c33 CI: Extend timeouts for RAX
It appears our jobs end up encountering some performance issues
on some RAX hosts where the overall runtime is already close to
the maximum for the job window.

Ultimately what happens is the performance ends up putting the boot
of the workload VM just a little bit later and we end up failing
the job as the VM is still early in booting up.

Change-Id: I309220d7f61f87b7457668fcf586d4ea152d363b
2022-06-23 09:32:13 -07:00

122 lines
3.6 KiB
YAML

---
- name: Create a port
command: openstack port create --network private test-port
when: metalsmith_precreate_port
- name: Set port argument
set_fact:
nic:
port: test-port
when: metalsmith_precreate_port
- name: Set network argument
set_fact:
nic:
network: private
when: not metalsmith_precreate_port
- name: Deploy a node
include_role:
name: metalsmith_deployment
vars:
metalsmith_debug: true
metalsmith_resource_class: baremetal
metalsmith_instances:
- hostname: test
nics:
- "{{ nic }}"
ssh_public_keys:
- "{{ ssh_key_file }}"
user_name: "{{ configure_instance_user | default('') }}"
# FIXME(dtantsur): openstacksdk issues a deprecation warning here, which
# somehow ends up in stdout, presumably because of Zuul quirks.
# Hack around it while we're solving the issue.
- name: Get instance info via CLI show
shell: |
errout="$(mktemp)"
if ! metalsmith --format=json show test 2> "$errout"; then
cat "$errout"
exit 1
fi
register: instance_info
- name: Register instance information
set_fact:
instance: "{{ (instance_info.stdout | from_json).test }}"
failed_when: instance.state != 'active' or instance.node.provision_state != 'active'
# FIXME(dtantsur): openstacksdk issues a deprecation warning here, which
# somehow ends up in stdout, presumably because of Zuul quirks.
# Hack around it while we're solving the issue.
- name: Get instance info via CLI list
shell: |
errout="$(mktemp)"
if ! metalsmith --format=json list 2> "$errout"; then
cat "$errout"
exit 1
fi
register: instance_info_via_list
- name: Verify that instance info via list is also correct
set_fact:
instance_via_list: "{{ (instance_info_via_list.stdout | from_json).test }}"
failed_when: instance_via_list.state != 'active' or instance_via_list.node.provision_state != 'active'
- name: Show active node information
command: openstack baremetal node show {{ instance.node.id }}
- name: Get IP address
set_fact:
instance_ip: "{{ instance.ip_addresses.values() | list | first | first }}"
failed_when: not instance_ip
- name: SSH into the instance
command: >
ssh -v -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o ConnectTimeout=10
{{ configure_instance_user | default('cirros') }}@{{ instance_ip }}
"cat /etc/hostname"
register: ssh_result
until: ssh_result is success
retries: 30
delay: 30
- name: Undeploy a node
command: metalsmith --debug undeploy --wait 900 test
- name: Get the current status of the deployed node
command: openstack baremetal node show {{ instance.node.id }} -f json
register: undeployed_node_result
- name: Parse node state
set_fact:
undeployed_node: "{{ undeployed_node_result.stdout | from_json }}"
- name: Check that the node was undeployed
fail:
msg: The node is in unexpected status {{ undeployed_node }}
when: undeployed_node.provision_state != "available"
- name: Check that the node extra was cleared
fail:
msg: The node still has extra {{ undeployed_node }}
when: undeployed_node.extra != {}
- name: Get attached VIFs for the node
command: openstack baremetal node vif list {{ instance.node.id }} -f value -c ID
register: vif_list_output
- name: Check that no VIFs are still attached
fail:
msg: Some VIFs are still attached
when: vif_list_output.stdout != ""
- name: Show remaining ports
command: openstack port list
- name: Delete created port
command: openstack port delete test-port
when: metalsmith_precreate_port
# FIXME(dtantsur): fails because of ironic mis-behavior
ignore_errors: true