Add OpenShift verification role

This patch aims to add an Ansible playbook that verifies the OCP
cluster health.

Co-Authored-By: Federico Ressi <fressi@redhat.com>
Change-Id: I3aeab9fb1c0d19d08db05a773251d14f46a25eaf
This commit is contained in:
Itay Matza 2022-05-10 10:39:23 +03:00 committed by Federico Ressi
parent e2022af3bc
commit af20a2f12b
15 changed files with 403 additions and 28 deletions

View File

@ -1,8 +0,0 @@
---
- hosts: localhost
vars_files:
- vars/some-vars.yaml
tasks:
- debug: var=var1
- debug: var=var2

View File

@ -1,5 +0,0 @@
---
- hosts: all
roles:
- ping

View File

@ -0,0 +1,5 @@
---
collections:
- name: openstack.cloud
- name: kubernetes.core

View File

@ -1,2 +0,0 @@
- name: 'Ping hosts'
ping:

View File

@ -0,0 +1,33 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: demo
spec:
replicas: 3
selector:
matchLabels:
app: demo
template:
metadata:
labels:
app: demo
spec:
containers:
- name: demo
image: quay.io/kuryr/demo
ports:
- containerPort: 80
---
apiVersion: v1
kind: Service
metadata:
name: demo
labels:
app: demo
spec:
selector:
app: demo
ports:
- port: 80
protocol: TCP
targetPort: 8080

View File

@ -0,0 +1,41 @@
#/!/bin/bash
#set -x
function wait_for_build_completion()
{
running=`oc get pods --all-namespaces | grep build | grep Running | wc -l`
while [ $running -ne 0 ]; do
sleep 5
running=`oc get pods --all-namespaces | grep build | grep Running | wc -l`
echo "$running builds are still running"
done
}
function wait_for_deployment_completion()
{
running=`oc get pods --all-namespaces | grep deploy | grep Running | wc -l`
while [ $running -ne 0 ]; do
sleep 5
running=`oc get pods --all-namespaces | grep deploy | grep Running | wc -l`
echo "$running deployments are still running"
done
}
function check_no_error_pods()
{
error=`oc get pods --all-namespaces | grep Error | wc -l`
if [ $error -ne 0 ]; then
echo -e "Found pods in error state:\n$(oc get pods --all-namespaces | grep Error)"
echo "details:"
oc get pods --all-namespaces | grep Error | awk '{print "oc describe pod -n " $1" "$2}' | sh
echo "$error pods in error state found, exiting"
exit 1
fi
}
wait_for_build_completion
wait_for_deployment_completion
check_no_error_pods

View File

@ -0,0 +1,51 @@
- name: re-create demo project
import_tasks: recreate_ocp_project.yml
vars:
project_name: demo
- name: Run the demo application
vars:
ansible_python_interpreter: "{{ k8s_venv_python_interpreter }}"
kubernetes.core.k8s:
state: present
definition: "{{ lookup('file', '../files/demo.yml') }}"
namespace: demo
- name: Wait for for 3 Pods labelled app=demo
vars:
ansible_python_interpreter: "{{ k8s_venv_python_interpreter }}"
kubernetes.core.k8s_info:
kind: Pod
namespace: demo
label_selectors:
- app = demo
field_selectors:
- status.phase=Running
register: demo_pods
retries: 15
delay: 15
until: demo_pods.resources|length == 3
- name: Get the first pod
set_fact:
first_pod_name: "{{ demo_pods | json_query('resources[*].metadata.name') | first }} "
- name: Get the demo service
vars:
ansible_python_interpreter: "{{ k8s_venv_python_interpreter }}"
kubernetes.core.k8s_info:
kind: Service
name: demo
namespace: demo
register: demo_svc
- name: Get demo service IP
set_fact:
demo_svc_ip: "{{ demo_svc| json_query('resources[0].spec.clusterIP') }}"
- name: Check connectivity between the first pod and the demo service
shell: "oc exec -t {{ first_pod_name }} -n demo -- curl {{ demo_svc_ip }}"
register: pod_result
retries: 30
delay: 10
until: pod_result.stdout is search('HELLO! I AM ALIVE!!!')

View File

@ -0,0 +1,19 @@
- name: Get Instances, Nodes, Machines, and MachineSets information
block:
- name: Get OCP machinesets status
shell: 'oc get machinesets -A'
- name: Get OCP machines status
shell: 'oc get machines -A'
- name: Get OCP nodes status
shell: 'oc get nodes'
- name: Get the project's instances status
shell: |
source {{ user_env_file }}
openstack server list
- name: Fail inside rescue block
fail:
msg: "The MachineSets health check failed. See above commands output for more information."

View File

@ -0,0 +1,40 @@
- name: Check if {{ project_name }} project exists
vars:
ansible_python_interpreter: "{{ k8s_venv_python_interpreter }}"
kubernetes.core.k8s_info:
kind: Project
name: "{{ project_name }}"
api_version: project.openshift.io/v1
register: oc_project
- name: Delete the {{ project_name }} project
vars:
ansible_python_interpreter: "{{ k8s_venv_python_interpreter }}"
kubernetes.core.k8s:
state: absent
api_version: project.openshift.io/v1
kind: Project
name: "{{ project_name }}"
when:
- oc_project.resources
- name: Wait for the {{ project_name }} project to be deleted
vars:
ansible_python_interpreter: "{{ k8s_venv_python_interpreter }}"
kubernetes.core.k8s_info:
kind: Project
name: "{{ project_name }}"
api_version: project.openshift.io/v1
register: oc_project
retries: 30
delay: 10
until: oc_project.resources|length == 0
- name: Create the {{ project_name }} project
vars:
ansible_python_interpreter: "{{ k8s_venv_python_interpreter }}"
kubernetes.core.k8s:
state: present
api_version: project.openshift.io/v1
kind: Project
name: "{{ project_name }}"

View File

@ -0,0 +1,11 @@
- name: Check status of node
fail:
msg: "{{ item.message }}"
when: item.type == 'Ready' and item.status == 'False'
with_items: "{{ oc_node.status.conditions }}"
- name: Check conditions of node
fail:
msg: "{{ item.message }}"
when: item.type != 'Ready' and item.status == 'True'
with_items: "{{ oc_node.status.conditions }}"

View File

@ -0,0 +1,177 @@
- name: Create a symlink to the kubeconfig in the installer host
block:
- name: Create oc config directory .kube/ in the installer host
file:
path: ~/.kube
state: directory
- name: Create symlink in .kube/ dir to the kubeconfig file in the installer host
file:
src: ~/ostest/auth/kubeconfig
dest: ~/.kube/config
state: link
- name: Get the Network type
shell: oc get network cluster -o json | jq .spec.networkType
register: oc_network
- name: Set the Network type
set_fact:
ocp_network_type: oc_network.stdout
- name: Check OpenShift api status
shell: 'oc status'
retries: 10
delay: 10
until: oc_status.rc == 0
register: oc_status
- name: Build/use the virtualenv for verification
pip:
name: "{{ k8s_venv_pip_package_list }}"
state: present
virtualenv: "{{ k8s_venv_path }}"
register: _install_venv_pip_packages
retries: 5
delay: 10
until: _install_venv_pip_packages is success
- name: discover running ocp release
command: oc get clusterversion
register: oc_get_clusterversion
- set_fact:
discovered_openshift_release: "{{ oc_get_clusterversion.stdout_lines[-1] | regex_replace('version +([0-9]+.[0-9]+).*$','\\1') }}"
- name: Prepare setup for OSP < 16
package:
name:
- python3
- libselinux-python3
state: present
become: true
when: openstack_version|default('16')|int < 16
- name: Check the project's instances healthy
block:
- name: Get information about the "{{ user_cloud }}" project servers
vars:
ansible_python_interpreter: "{{ k8s_venv_python_interpreter }}"
openstack.cloud.server_info:
cloud: "{{ user_cloud }}"
register: project_servers
- name: Fail if not all project servers are active
vars:
error_msg: >
Failed! The {{ item.name }} server in the {{ user_cloud }} project
is in {{ item.vm_state }} status.
fail:
msg: "{{ error_msg }}"
when: item.vm_state != "active"
loop: "{{ project_servers.openstack_servers }}"
rescue:
- name: Get Instances, Machines, and MachineSets information for unhealthy project instances
include_tasks: debug_machines.yml
- name: Check the MachineSets healthy
block:
- name: Get the MachineSets status
vars:
ansible_python_interpreter: "{{ k8s_venv_python_interpreter }}"
kubernetes.core.k8s_info:
api_version: machine.openshift.io/v1beta1
kind: MachineSet
register: oc_machines
- name: Fail if the number of OCP desired machines is not equal to the available machines
vars:
error_msg: >
Failed! The replicas value of MachineSet {{ item.metadata.name }} is
not corresponding with the MachineSet available replicas status.
fail:
msg: "{{ error_msg }}"
when: item.status.availableReplicas is not defined or
item.status.replicas != item.status.availableReplicas
loop: "{{ oc_machines.resources }}"
rescue:
- name: Get Instances, Machines, and MachineSets information for unhealthy MachineSets
include_tasks: debug_machines.yml
- name: Check number of OpenShift nodes
shell: 'oc get nodes -o json'
retries: 10
delay: 10
no_log: true
until: oc_nodes.rc == 0
register: oc_nodes
- set_fact:
oc_get_nodes_result: "{{ oc_nodes.stdout | from_json }}"
- name: Validate node statuses
include_tasks: "validate_node.yml"
with_items: "{{ oc_get_nodes_result['items'] }}"
loop_control:
loop_var: oc_node
- name: Wait for pods to be ready
script: ../scripts/pods_ready.sh
ignore_errors: yes
register: pods_ready
- name: Get PODs statuses
shell: 'oc get pods --all-namespaces -o json'
no_log: true
register: oc_pods
- set_fact:
oc_get_pods_result: "{{ oc_pods.stdout | from_json }}"
no_log: true
- name: Check we have at least one registry pod
vars:
query: "items[?starts_with(metadata.name, 'image-registry')]"
fail:
msg: "There should be at least one image-registry pod"
# the to_json | from_json w/a is to avoid a known issue of ansible:
# https://github.com/ansible/ansible/issues/20379#issuecomment-284034650
when: oc_get_pods_result | to_json | from_json | json_query(query) | length == 0
- name: Check we have at least one router pod
vars:
query: "items[?starts_with(metadata.name, 'router')]"
fail:
msg: "There should be at least one router pod"
when: oc_get_pods_result | to_json | from_json | json_query(query) | length == 0
- name: get the disabled load balancers listeners
shell: |-
source {{ user_env_file }}
openstack loadbalancer listener list --disable -f value
register: osp_lbs_listeners
- name: Fail when there are disabled load balancers listeners
fail:
msg: |
"The following load balancers are in disabled state:"
"{{ osp_lbs_listeners.stdout }}"
when: osp_lbs_listeners.stdout_lines|length > 0
- name: Active wait until all the ClusterOperators are ready
vars:
ansible_python_interpreter: "{{ k8s_venv_python_interpreter }}"
kubernetes.core.k8s_info:
api_version: config.openshift.io/v1
kind: ClusterOperator
register: cluster_operators
until:
- cluster_operators is not failed
- cluster_operators.resources is defined
- cluster_operators | json_query('resources[*].status.conditions[?type==`Available`].status') | unique == [["True"]]
- cluster_operators | json_query('resources[*].status.conditions[?type==`Progressing`].status') | unique == [["False"]]
- cluster_operators | json_query('resources[*].status.conditions[?type==`Degraded`].status') | unique == [["False"]]
retries: 20
delay: 30
- name: Create a demo app and check connectivity
include_tasks: check_demo_app.yml

View File

@ -0,0 +1,11 @@
user_cloud: "shiftstack"
user_env_file: "/home/stack/shiftstackrc"
k8s_venv_path: "/var/tmp/venv_k8s"
k8s_venv_python_interpreter: "{{ k8s_venv_path }}/bin/python"
k8s_venv_pip_package_list:
- openstacksdk
- setuptools==59.6.0
- openshift
- kubernetes==22.6.0
- kubernetes-validate
- selinux

View File

@ -1,4 +0,0 @@
---
- var1: value1
var2: value2

View File

@ -0,0 +1,7 @@
---
- hosts: undercloud
tasks:
- name: Run OCP verification task
import_role:
name: tests
tasks_from: verification.yml

View File

@ -17,6 +17,7 @@ import os
import testtools
from tobiko import shiftstack
from tobiko import tripleo
@ -24,16 +25,14 @@ PLAYBOOK_DIRNAME = os.path.join(os.path.dirname(__file__), 'playbooks')
@tripleo.skip_if_missing_tripleo_ansible_inventory
@shiftstack.skip_unless_has_shiftstack()
class OpenShiftTest(testtools.TestCase):
def test_ping_all_hosts(self):
def test_ocp_cluster(self):
clouds_file_path = shiftstack.get_clouds_file_path()
tripleo.run_playbook_from_undercloud(
playbook='ping-shiftstack.yaml',
playbook='verify-shiftstack.yaml',
playbook_dirname=PLAYBOOK_DIRNAME,
roles=['ping'])
def test_debug_vars(self):
tripleo.run_playbook_from_undercloud(
playbook='debug-vars.yaml',
playbook_dirname=PLAYBOOK_DIRNAME,
playbook_files=['vars/some-vars.yaml'])
playbook_files=[clouds_file_path],
requirements_files=['requirements.yaml'],
roles=['tests'])