openstack-ansible/playbooks/healthcheck-infrastructure.yml
Jonathan Rosser dc1f76c823 Remove support for calico ml2 driver.
The deployment of etcd as a service on the control plane is no
longer needed as calico was the only user of this.

The etcd-server role remains in our requirements as it is used
internally as part of the Zun playbook.

Change-Id: I2a158fd2b85ec0e637071ed4ef7c123a6583ecc0
2023-02-23 12:13:55 +01:00

400 lines
13 KiB
YAML

---
# Copyright 2017, Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This playbook is meant to run after setup-infrastructure, and expects
# the infrastructure bits to have properly deployed to succeed.
# Test unbound-install.yml
# TO BE IMPLEMENTED
# Test repo-install.yml
- name: Ensure all repo-servers are built and are accessible by hosts.
hosts: all_containers[0]:physical_hosts[0]
gather_facts: yes
vars:
repo_requirements_file: "constraints/upper_constraints_cached.txt"
tasks:
- name: Check the repo sync file on each repo server
uri:
url: "http://{{ hostvars[item]['container_address'] }}:{{ repo_server_port }}/{{ repo_requirements_file }}"
with_inventory_hostnames: "{{ groups['repo_all'] }}"
when: install_method == 'source'
tags:
- healthcheck
- healthcheck-repo-install
# Test haproxy-install.yml
- name: Ensuring haproxy runs
hosts: haproxy
gather_facts: yes
tasks:
- name: Check if host can connect to external keepalived ping IP
command: "ping -c 2 {{ keepalived_external_ping_address }}"
changed_when: false
- name: Check if host can connect to internal keepalived ping IP
command: "ping -c 2 {{ keepalived_internal_ping_address }}"
changed_when: false
- name: Checking if keepalived is running
command: "pgrep keepalived"
changed_when: false
when: groups['haproxy'] | length > 1
- package:
name: "{% if ansible_facts['os_family'] | lower == 'redhat' %}nmap-ncat{% else %}netcat-openbsd{% endif %}"
state: present
# Fails if HAProxy is not running
- name: Recording haproxy stats as a way to ensure haproxy runs
shell: 'echo "show info;show stat" | nc -U /var/run/haproxy.stat'
changed_when: false
register: haproxy_stats
# Run this playbook with -v and you'll see your DOWN issues
- name: Printing the output of haproxy stats
debug:
var: haproxy_stats
verbosity: 1
tags:
- healthcheck
- healthcheck-haproxy-install
# Test repo-use.yml
- name: Ensure all the containers can connect to the repos
hosts: all_containers
gather_facts: yes
# By having serial, you ensure that the first three containers are hitting
# the load balancer at the same time, which would then cause hitting three
# different repos servers.
# When this is done, the rest can be done with all the nodes at the same time.
serial:
- 3
- 100%
vars_files:
- defaults/source_install.yml
tasks:
# Repo release path points to the internal LB vip
- name: Check the presence of upper constraints on your repos and check load balancing
uri:
url: "{{ openstack_repo_url }}/constraints/upper_constraints_cached.txt"
tags:
- healthcheck
- healthcheck-repo-use
# Test utility-install.yml
- name: Ensure the service setup host is ready to run openstack calls
hosts: "{{ openstack_service_setup_host | default('localhost') }}"
gather_facts: no
vars_files:
- "defaults/{{ install_method }}_install.yml"
vars:
ansible_python_interpreter: "{{ openstack_service_setup_host_python_interpreter | default(ansible_facts['python']['executable']) }}"
tasks:
- name: Get openstack client config
openstack.cloud.config:
- name: Show openstack client config
debug:
var: openstack.clouds
verbosity: 1
tags:
- healthcheck
- healthcheck-utility-install
# Test memcached-install.yml
- name: Check memcached for keystone
hosts: keystone_all
gather_facts: no
tasks:
- name: Set facts about memcached
setup:
delegate_to: "{{ item }}"
delegate_facts: true
with_items: "{{ groups['memcached'] }}"
- package:
name: "{% if ansible_facts['os_family'] | lower == 'redhat' %}nmap-ncat{% else %}netcat-openbsd{% endif %}"
state: present
- name: Connect to remote memcache servers (full mesh testing)
shell: "echo stats | nc -w 3 {{ hostvars[memcached_host]['container_address'] }} {{ memcached_port }}"
changed_when: false
register: memcache_stats
with_items: "{{ groups['memcached'] }}"
loop_control:
loop_var: memcached_host
- name: Output memcache stats if in verbose mode
debug:
var: memcache_stats
verbosity: 1
tags:
- healthcheck
- healthcheck-memcached-install
# Test galera-install.yml
- name: Sanity checks for all containers
hosts: all_containers:physical_hosts
gather_facts: no
tasks:
- name: Connect to galera port
wait_for:
port: 3306
host: "{{ internal_lb_vip_address }}"
state: started
tags:
- healthcheck
- healthcheck-galera-install
- name: Run functional tests
hosts: galera_all
user: root
gather_facts: true
tasks:
- name: Wait for cluster to be ready
block:
- name: Wait for cluster ready state
command: |
mysql -h {{ ansible_host }} \
-u "{{ galera_root_user | default('root') }}" \
-p"{{ galera_root_password }}" \
-e "show status like 'wsrep_incoming_addresses';" \
--silent \
--skip-column-names
register: mysql_instance_ready
retries: 20
delay: 5
changed_when: false
until: mysql_instance_ready is success and mysql_instance_ready.stdout.split()[-1].split(',') | length == groups['galera_all'] | length
rescue:
- name: Restarting weird maria instance
service:
name: mariadb
state: restarted
- name: Wait for cluster ready state
command: |
mysql -h {{ ansible_host }} \
-u "{{ galera_root_user | default('root') }}" \
-p"{{ galera_root_password }}" \
-e "show status like 'wsrep_incoming_addresses';" \
--silent \
--skip-column-names
register: mysql_instance_ready
retries: 20
delay: 5
changed_when: false
until: mysql_instance_ready is success and mysql_instance_ready.stdout.split()[-1].split(',') | length == groups['galera_all'] | length
- name: Check cluster local state
command: |
mysql -h {{ ansible_host }} \
-u "{{ galera_root_user | default('root') }}" \
-p"{{ galera_root_password }}" \
-e "show status like 'wsrep_local_state_comment';" \
--silent \
--skip-column-names
register: wsrep_local_state_comment
changed_when: false
tags:
- skip_ansible_lint
- name: Check cluster evs state
command: |
mysql -h {{ ansible_host }} \
-u "{{ galera_root_user | default('root') }}" \
-p"{{ galera_root_password }}" \
-e "show status like 'wsrep_evs_state';" \
--silent \
--skip-column-names
register: wsrep_evs_state
changed_when: false
tags:
- skip_ansible_lint
- name: Check contents
assert:
that:
- "'Synced' in wsrep_local_state_comment.stdout"
- "'OPERATIONAL' in wsrep_evs_state.stdout"
- name: Create DB for service on "{{ groups['galera_all'][0] }}"
community.mysql.mysql_db:
login_user: "{{ galera_root_user | default('root') }}"
login_password: "{{ galera_root_password }}"
login_host: "{{ ansible_host }}"
name: "OSA-test"
state: "present"
when: inventory_hostname == groups['galera_all'][0]
tags:
- skip_ansible_lint
- name: Grant access to the DB on "{{ groups['galera_all'][-1] }}"
community.mysql.mysql_user:
login_user: "{{ galera_root_user | default('root') }}"
login_password: "{{ galera_root_password }}" # noqa no-log-password
login_host: "{{ ansible_host }}"
name: "osa-tester"
password: "tester-secrete" # noqa no-log-password
host: "{{ item }}"
state: "present"
priv: "OSA-test.*:ALL"
with_items:
- "localhost"
- "%"
when: inventory_hostname == groups['galera_all'][-1]
- name: Try to login with user to DB
delegate_to: "{{ groups['utility_all'][0] }}"
command: |
mysql -h {{ internal_lb_vip_address }} \
-p"tester-secrete" \
-u osa-tester \
OSA-test \
-e "SHOW TABLES;"
when: inventory_hostname == groups['galera_all'][-1]
- name: Remove created user
community.mysql.mysql_user:
login_user: "{{ galera_root_user | default('root') }}"
login_password: "{{ galera_root_password }}" # noqa no-log-password
login_host: "{{ ansible_host }}"
name: "osa-tester"
state: "absent"
host: "{{ item }}"
with_items:
- "localhost"
- "%"
when: inventory_hostname == groups['galera_all'][-1]
- name: Remove created DB
community.mysql.mysql_db:
login_user: "{{ galera_root_user | default('root') }}"
login_password: "{{ galera_root_password }}" # noqa no-log-password
login_host: "{{ ansible_host }}"
name: "OSA-test"
state: "absent"
when: inventory_hostname == groups['galera_all'][0]
tags:
- skip_ansible_lint
# Test rabbitmq-install.yml
- name: Add a user for rabbitmq
hosts: rabbitmq_all[0]
gather_facts: no
tasks:
- name: Configure Rabbitmq vhost
community.rabbitmq.rabbitmq_vhost:
name: "/testvhost"
state: "present"
- name: Configure Rabbitmq user
community.rabbitmq.rabbitmq_user:
user: "testguest"
password: "secrete" # noqa no-log-password
vhost: "/testvhost"
configure_priv: ".*"
read_priv: ".*"
write_priv: ".*"
state: "present"
tags:
- healthcheck
- healthcheck-rabbitmq-install
- name: Ensure all the usual openstack containers can connect to rabbit
hosts: all_containers:!galera_all:!memcached:!haproxy:!rabbitmq_all:!unbound:!repo_all
gather_facts: no
vars:
venv_path: /tmp/rabbitmqtest
vars_files:
- "defaults/{{ install_method }}_install.yml"
post_tasks:
- name: Generate venv for rabbitmq testing
include_role:
name: "python_venv_build"
vars:
venv_install_destination_path: "{{ venv_path }}"
venv_pip_packages:
- pika
- name: Copying test script
copy:
src: "../scripts/rabbitmq-test.py"
dest: "{{ venv_path }}/rabbitmq-test.py"
mode: 0755
- name: Connect to rabbitmq
command: "{{ venv_path }}/bin/python {{ venv_path }}/rabbitmq-test.py {{ hostvars[groups['rabbitmq_all'][0]]['container_address'] }}"
changed_when: false
tags:
- healthcheck
- healthcheck-rabbitmq-install
- name: Remove guest user for rabbitmq
hosts: rabbitmq_all[0]
gather_facts: no
tasks:
- name: Remove test user
community.rabbitmq.rabbitmq_user:
user: testguest
password: secrete
vhost: "/testvhost"
state: absent
no_log: true
- name: Remove test vhost
community.rabbitmq.rabbitmq_vhost:
name: "/testvhost"
state: "absent"
tags:
- healthcheck
- healthcheck-rabbitmq-install
- healthcheck-teardown
# Test zookeeper-install
- name: Ensure coordination is running and accepting connections
hosts: utility_all[0]
tasks:
- name: Probing TCP connection to zookeeper
wait_for:
host: "{{ hostvars[item]['ansible_host'] }}"
port: "{{ coordination_port | default(2181) }}"
with_items: "{{ groups[coordination_host_group | default('zookeeper_all')] }}"
- name: Ensure zookeeper is healthy
hosts: "zookeeper_all"
tasks:
- name: Esuring netcat is installed
package:
name: "{% if ansible_facts['os_family'] | lower == 'redhat' %}nmap-ncat{% else %}netcat-openbsd{% endif %}"
state: present
- name: Gathering zookeeper state
shell: "echo ruok | nc localhost {{ coordination_port | default(2181) }}"
register: zookeeper_ok
changed_when: false
- name: Gathering zookeeper rw/ro
shell: "echo isro | nc localhost {{ coordination_port | default(2181) }}"
register: zookeeper_ro
changed_when: false
- name: Check zookeeper results
assert:
that:
- "'imok' in zookeeper_ok.stdout"
- "'rw' in zookeeper_ro.stdout"
# TODO: Other playbook's tests.