Add retries to ovn_overcloud_processes_validations
This patch also includes a small refactoring of OvercloudProcessesStatus.basic_overcloud_processes_running and OvercloudProcessesStatus.ovn_overcloud_processes_validations methods. Change-Id: I0761afdb6e8292ebc70ba479a8eaab84353a25b4
This commit is contained in:
parent
7733488f00
commit
459a954ce1
|
@ -2,7 +2,6 @@ from __future__ import absolute_import
|
||||||
|
|
||||||
import io
|
import io
|
||||||
import re
|
import re
|
||||||
import time
|
|
||||||
|
|
||||||
from oslo_log import log
|
from oslo_log import log
|
||||||
import pandas
|
import pandas
|
||||||
|
@ -123,64 +122,110 @@ class OvercloudProcessesStatus(object):
|
||||||
self.oc_procs_df = overcloud.get_overcloud_nodes_dataframe(
|
self.oc_procs_df = overcloud.get_overcloud_nodes_dataframe(
|
||||||
get_overcloud_node_processes_table)
|
get_overcloud_node_processes_table)
|
||||||
|
|
||||||
|
def _basic_overcloud_process_running(self, process_name):
|
||||||
|
# osp16/python3 process is "neutron-server:"
|
||||||
|
if process_name == 'neutron-server' and \
|
||||||
|
self.oc_procs_df.query('PROCESS=="{}"'.format(
|
||||||
|
process_name)).empty:
|
||||||
|
process_name = 'neutron-server:'
|
||||||
|
# osp17 mysqld process name is mysqld_safe
|
||||||
|
if process_name == 'mysqld' and \
|
||||||
|
self.oc_procs_df.query('PROCESS=="{}"'.format(
|
||||||
|
process_name)).empty:
|
||||||
|
process_name = 'mysqld_safe'
|
||||||
|
# redis not deployed on osp17 by default, only if some
|
||||||
|
# other services such as designate and octavia are deployed
|
||||||
|
if (process_name == 'redis-server' and
|
||||||
|
not overcloud.is_redis_expected()):
|
||||||
|
redis_message = ("redis-server not expected on OSP 17 "
|
||||||
|
"and later releases by default")
|
||||||
|
if self.oc_procs_df.query(
|
||||||
|
f'PROCESS=="{process_name}"').empty:
|
||||||
|
LOG.info(redis_message)
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
raise OvercloudProcessesException(
|
||||||
|
process_error=redis_message)
|
||||||
|
|
||||||
|
if not self.oc_procs_df.query('PROCESS=="{}"'.format(
|
||||||
|
process_name)).empty:
|
||||||
|
LOG.info("overcloud processes status checks: "
|
||||||
|
"process {} is "
|
||||||
|
"in running state".format(process_name))
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
LOG.info("Failure : overcloud processes status checks:"
|
||||||
|
"process {} is not running ".format(
|
||||||
|
process_name))
|
||||||
|
raise OvercloudProcessesException(
|
||||||
|
process_error="process {} is not running ".format(
|
||||||
|
process_name))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def basic_overcloud_processes_running(self):
|
def basic_overcloud_processes_running(self):
|
||||||
"""
|
"""
|
||||||
Checks that the oc_procs_df dataframe has all of the list procs
|
Checks that the oc_procs_df dataframe has all of the list procs
|
||||||
:return: Bool
|
:return: Bool
|
||||||
"""
|
"""
|
||||||
for attempt_number in range(600):
|
for attempt in tobiko.retry(timeout=300., interval=1.):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
||||||
for process_name in self.processes_to_check:
|
for process_name in self.processes_to_check:
|
||||||
# osp16/python3 process is "neutron-server:"
|
self._basic_overcloud_process_running(process_name)
|
||||||
if process_name == 'neutron-server' and \
|
|
||||||
self.oc_procs_df.query('PROCESS=="{}"'.format(
|
|
||||||
process_name)).empty:
|
|
||||||
process_name = 'neutron-server:'
|
|
||||||
# osp17 mysqld process name is mysqld_safe
|
|
||||||
if process_name == 'mysqld' and \
|
|
||||||
self.oc_procs_df.query('PROCESS=="{}"'.format(
|
|
||||||
process_name)).empty:
|
|
||||||
process_name = 'mysqld_safe'
|
|
||||||
# redis not deployed on osp17 by default, only if some
|
|
||||||
# other services such as designate and octavia are deployed
|
|
||||||
if (process_name == 'redis-server' and
|
|
||||||
not overcloud.is_redis_expected()):
|
|
||||||
redis_message = ("redis-server not expected on OSP 17 "
|
|
||||||
"and later releases by default")
|
|
||||||
if self.oc_procs_df.query(
|
|
||||||
f'PROCESS=="{process_name}"').empty:
|
|
||||||
LOG.info(redis_message)
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
raise OvercloudProcessesException(
|
|
||||||
process_error=redis_message)
|
|
||||||
|
|
||||||
if not self.oc_procs_df.query('PROCESS=="{}"'.format(
|
|
||||||
process_name)).empty:
|
|
||||||
LOG.info("overcloud processes status checks: "
|
|
||||||
"process {} is "
|
|
||||||
"in running state".format(process_name))
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
LOG.info("Failure : overcloud processes status checks:"
|
|
||||||
"process {} is not running ".format(
|
|
||||||
process_name))
|
|
||||||
raise OvercloudProcessesException(
|
|
||||||
process_error="process {} is not running ".format(
|
|
||||||
process_name))
|
|
||||||
# if all procs are running we can return true
|
|
||||||
return True
|
|
||||||
except OvercloudProcessesException:
|
except OvercloudProcessesException:
|
||||||
LOG.info('Retrying overcloud processes checks attempt '
|
if attempt.is_last:
|
||||||
'{} of 360'.format(attempt_number))
|
LOG.error('Not all overcloud processes are running')
|
||||||
time.sleep(1)
|
raise
|
||||||
|
LOG.info('Retrying overcloud processes: %s', attempt.details)
|
||||||
self.oc_procs_df = overcloud.get_overcloud_nodes_dataframe(
|
self.oc_procs_df = overcloud.get_overcloud_nodes_dataframe(
|
||||||
get_overcloud_node_processes_table)
|
get_overcloud_node_processes_table)
|
||||||
# exhausted all retries
|
|
||||||
tobiko.fail('Not all overcloud processes are running !\n')
|
# if all procs are running we can return true
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _ovn_overcloud_process_validations(self, process_dict):
|
||||||
|
if not self.oc_procs_df.query('PROCESS=="{}"'.format(
|
||||||
|
process_dict['name'])).empty:
|
||||||
|
LOG.info("overcloud processes status checks: "
|
||||||
|
f"process {process_dict['name']} is "
|
||||||
|
"in running state")
|
||||||
|
|
||||||
|
ovn_proc_filtered_df = self.oc_procs_df.query(
|
||||||
|
'PROCESS=="{}"'.format(process_dict['name']))
|
||||||
|
|
||||||
|
if (process_dict['node_group'] not in
|
||||||
|
topology.list_openstack_node_groups()):
|
||||||
|
LOG.debug(
|
||||||
|
f"{process_dict['node_group']} is not "
|
||||||
|
"a node group part of this Openstack cloud")
|
||||||
|
return
|
||||||
|
node_list = [node.name
|
||||||
|
for node in
|
||||||
|
topology.list_openstack_nodes(
|
||||||
|
group=process_dict['node_group'])]
|
||||||
|
node_names_re = re.compile(r'|'.join(node_list))
|
||||||
|
node_filter = (ovn_proc_filtered_df.overcloud_node.
|
||||||
|
str.match(node_names_re))
|
||||||
|
# get the processes running on a specific type of nodes
|
||||||
|
ovn_proc_filtered_per_node_df = \
|
||||||
|
ovn_proc_filtered_df[node_filter]
|
||||||
|
total_num_processes = len(ovn_proc_filtered_per_node_df)
|
||||||
|
|
||||||
|
if type(process_dict['number']) == int:
|
||||||
|
expected_num_processes = process_dict['number']
|
||||||
|
elif process_dict['number'] == 'all':
|
||||||
|
expected_num_processes = len(node_list)
|
||||||
|
else:
|
||||||
|
raise ValueError("Unexpected value:"
|
||||||
|
f"{process_dict['number']}")
|
||||||
|
|
||||||
|
if expected_num_processes != total_num_processes:
|
||||||
|
raise OvercloudProcessesException(
|
||||||
|
"Unexpected number"
|
||||||
|
f" of processes {process_dict['name']} running on "
|
||||||
|
f"{process_dict['node_group']} nodes")
|
||||||
|
# process successfully validated
|
||||||
|
LOG.debug(f"{process_dict['name']} successfully validated on "
|
||||||
|
f"{process_dict['node_group']} nodes")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def ovn_overcloud_processes_validations(self):
|
def ovn_overcloud_processes_validations(self):
|
||||||
|
@ -193,49 +238,18 @@ class OvercloudProcessesStatus(object):
|
||||||
LOG.info("Networking OVN not configured")
|
LOG.info("Networking OVN not configured")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
for process_dict in self.ovn_processes_to_check_per_node:
|
for attempt in tobiko.retry(timeout=300., interval=1.):
|
||||||
if not self.oc_procs_df.query('PROCESS=="{}"'.format(
|
try:
|
||||||
process_dict['name'])).empty:
|
for process_dict in self.ovn_processes_to_check_per_node:
|
||||||
LOG.info("overcloud processes status checks: "
|
self._ovn_overcloud_process_validations(process_dict)
|
||||||
f"process {process_dict['name']} is "
|
except OvercloudProcessesException:
|
||||||
"in running state")
|
if attempt.is_last:
|
||||||
|
LOG.error('Unexpected number of OVN overcloud processes')
|
||||||
|
raise
|
||||||
|
LOG.info('Retrying OVN overcloud processes: %s',
|
||||||
|
attempt.details)
|
||||||
|
self.oc_procs_df = overcloud.get_overcloud_nodes_dataframe(
|
||||||
|
get_overcloud_node_processes_table)
|
||||||
|
|
||||||
ovn_proc_filtered_df = self.oc_procs_df.query(
|
# if all procs are running we can return true
|
||||||
'PROCESS=="{}"'.format(process_dict['name']))
|
return True
|
||||||
|
|
||||||
if (process_dict['node_group'] not in
|
|
||||||
topology.list_openstack_node_groups()):
|
|
||||||
LOG.debug(f"{process_dict['node_group']} is not "
|
|
||||||
"a node group part of this Openstack cloud")
|
|
||||||
continue
|
|
||||||
node_list = [node.name
|
|
||||||
for node in
|
|
||||||
topology.list_openstack_nodes(
|
|
||||||
group=process_dict['node_group'])]
|
|
||||||
node_names_re = re.compile(r'|'.join(node_list))
|
|
||||||
node_filter = (ovn_proc_filtered_df.overcloud_node.
|
|
||||||
str.match(node_names_re))
|
|
||||||
# obtain the processes running on a specific type of nodes
|
|
||||||
ovn_proc_filtered_per_node_df = \
|
|
||||||
ovn_proc_filtered_df[node_filter]
|
|
||||||
if type(process_dict['number']) == int:
|
|
||||||
assert process_dict['number'] == \
|
|
||||||
len(ovn_proc_filtered_per_node_df), (
|
|
||||||
"Unexpected number"
|
|
||||||
f" of processes {process_dict['name']} running on "
|
|
||||||
f"{process_dict['node_group']} nodes")
|
|
||||||
elif process_dict['number'] == 'all':
|
|
||||||
num_nodes = len(node_list)
|
|
||||||
assert num_nodes == len(ovn_proc_filtered_per_node_df), (
|
|
||||||
"Unexpected number of processes "
|
|
||||||
f"{process_dict['name']} running on "
|
|
||||||
f"{process_dict['node_group']} nodes")
|
|
||||||
else:
|
|
||||||
raise RuntimeError("Unexpected value:"
|
|
||||||
f"{process_dict['node_group']}")
|
|
||||||
# process successfully validated
|
|
||||||
LOG.debug(f"{process_dict['name']} successfully validated on "
|
|
||||||
f"{process_dict['node_group']} nodes")
|
|
||||||
|
|
||||||
# if all procs are running we can return true
|
|
||||||
return True
|
|
||||||
|
|
Loading…
Reference in New Issue