Merge "add backround ping during disruptive actions"

This commit is contained in:
Zuul 2022-01-19 13:20:36 +00:00 committed by Gerrit Code Review
commit b565e89f22
7 changed files with 228 additions and 0 deletions

View File

@ -62,3 +62,5 @@ RECEIVED = _ping.RECEIVED
UNRECEIVED = _ping.UNRECEIVED
PingStatistics = _statistics.PingStatistics
write_ping_to_file = _ping.write_ping_to_file
check_ping_statistics = _ping.check_ping_statistics

View File

@ -15,9 +15,14 @@
# under the License.
from __future__ import absolute_import
import glob
import json
import io
import os
import time
import typing
import netaddr
from oslo_log import log
@ -415,3 +420,77 @@ def handle_ping_unknow_host_error(text):
if text.endswith(suffix):
details = text[:-len(suffix)].strip().split()[-1]
raise _exception.UnknowHostError(details=details)
def ping_to_json(ping_result: _statistics.PingStatistics) -> str:
'''Transform an iter_statistics.statistics object
into a json string with ping ip and result'''
destination = str(ping_result.destination)
transmitted = ping_result.transmitted
received = ping_result.received
timestamp = time.ctime(ping_result.begin_interval)
ping_result_line_dict = {"destination": destination,
"transmitted": transmitted,
"received": received,
"timestamp": timestamp}
return json.dumps(ping_result_line_dict)
def write_ping_to_file(ping_ip=None, output_dir='tobiko_ping_results'):
'''use iter_statistics to ping a host and record statistics
put results in output_dir filenames correlate with vm fip'''
output_dir_path = f'{sh.get_user_home_dir()}/{output_dir}'
if not os.path.exists(output_dir_path):
os.makedirs(output_dir_path)
output_filename = f'ping_{ping_ip}.log'
output_path = os.path.join(output_dir_path, output_filename)
LOG.info(f'starting ping process to > {ping_ip} , '
f'output file is : {output_path}')
ping_result_statistics = iter_statistics(parameters=None,
host=ping_ip, until=None,
timeout=99999,
check=True)
for ping_result in ping_result_statistics:
with open(output_path, "at") as ping_result_file:
ping_result_file.write(ping_to_json(ping_result) + "\n")
time.sleep(5)
def get_vm_ping_log_files(glob_ping_log_pattern='tobiko_ping_results/ping_'
'*.log'):
"""return a list of files mathcing : the pattern"""
glob_path = f'{sh.get_user_home_dir()}/{glob_ping_log_pattern}'
for filename in glob.glob(glob_path):
LOG.info(f'found following ping_vm_log files {filename}')
vm_ping_log_filename = filename
yield vm_ping_log_filename
def rename_ping_staistics_file_to_checked(filepath):
"""append _checked to a ping statistics file once finished it's check"""
os.rename(filepath, f'{filepath}_checked')
def check_ping_statistics(failure_limit=10):
"""Gets a list of ping_vm_log files and
iterates their lines, checks if max ping
failures have been reached per fip=file"""
# iterate over ping_vm_log files:
for filename in list(get_vm_ping_log_files()):
with io.open(filename, 'rt') as fd:
LOG.info(f'checking ping log file: {filename}, '
f'failure_limit is :{failure_limit}')
failure_counter = 0
for ping_line in fd.readlines():
ping_line = json.loads(ping_line.rstrip())
if ping_line['transmitted'] != ping_line['received']:
failure_counter += 1
LOG.debug(f'found ping failure to :'
f' {ping_line["destination"]}')
if failure_counter >= failure_limit:
rename_ping_staistics_file_to_checked(filename)
tobiko.fail(f'{failure_counter} pings failure found '
f'to vm fip destination: '
f'{ping_line["destination"]}')
LOG.info(f'no failures in ping log file: {filename}')
rename_ping_staistics_file_to_checked(filename)

View File

@ -24,6 +24,7 @@ from tobiko.shell.sh import _io
from tobiko.shell.sh import _local
from tobiko.shell.sh import _mktemp
from tobiko.shell.sh import _nameservers
from tobiko.shell.sh import _path
from tobiko.shell.sh import _process
from tobiko.shell.sh import _ps
from tobiko.shell.sh import _reboot
@ -71,6 +72,10 @@ ListNameserversFixture = _nameservers.ListNameserversFixture
list_nameservers = _nameservers.list_nameservers
process = _process.process
start_background_process = _process.start_background_process
check_or_start_background_process =\
_process.check_or_start_background_process
get_user_home_dir = _path.get_user_home_dir
str_from_stream = _process.str_from_stream
ShellProcessFixture = _process.ShellProcessFixture

View File

@ -16,6 +16,7 @@
from __future__ import absolute_import
import typing # noqa
from os.path import expanduser
from oslo_log import log
@ -25,6 +26,11 @@ import tobiko
LOG = log.getLogger(__name__)
def get_user_home_dir():
"""return a str path of the current user's home"""
return expanduser("~")
class ExecutePathFixture(tobiko.SharedFixture):
def __init__(self, executable_dirs=None, environ=None):

View File

@ -16,11 +16,17 @@
from __future__ import absolute_import
import io
import os
import time
import typing # noqa
from multiprocessing import Process as MultiProcess
import psutil
from oslo_log import log
import tobiko
from tobiko.shell import sh
from tobiko.shell.sh import _command
from tobiko.shell.sh import _exception
from tobiko.shell.sh import _io
@ -457,3 +463,104 @@ def default_sudo_command():
def network_namespace_command(network_namespace, command):
return _command.shell_command(['/sbin/ip', 'netns', 'exec',
network_namespace]) + command
def start_background_process(bg_function=None, bg_process_name=None, **kwargs):
"""Background process that will take a function name as parameter
and execute it in the background using a separate non attached process.
That process will continue to run even after Tobiko exists.
params:
bg_function= function name to run in background
bg_process_pid_file= file path that will contain the process pid, multiple
processes can use the same file pid are appended.
outputs: writes processes pids to a file, each in a line
returns: the process object"""
# define a parent process that would be killed and orphan the actual
# background process to run unattached in the background
# this is so the background process won't be stopped when tobiko exists
def _background_process_parent():
p = MultiProcess(target=bg_function, name=bg_process_name,
kwargs=kwargs)
p.start()
LOG.info(
f'Started background function: {bg_function.__name__} process pid '
f'is: {p.pid}, process name: {bg_process_name}, '
f'main execution process continues...')
# append bg_process pid to a file
bg_process_pids_file_name = f'{sh.get_user_home_dir()}/' \
f'{bg_process_name}_pids_file'
with open(bg_process_pids_file_name, "at") as bg_process_pid_file:
bg_process_pid_file.write(str(p.pid) + "\n")
LOG.debug(f'Writing pid: {p.pid} to pids file:'
f' {bg_process_pids_file_name}')
# start parent process, nested with a started child process
# then kill the parent
d = MultiProcess(target=_background_process_parent)
d.daemon = False
d.start()
LOG.debug(f'Background process parent started pid: {d.pid}')
time.sleep(1)
d.terminate()
LOG.debug(f'Background process orphaned, parent killed parent pid:'
f' {d.pid}')
def stop_process(pid_list):
"""Stop (kill) a process from a list"""
for pid in pid_list:
LOG.info(f'stopping process with pid: {pid}')
sh.execute(f'sudo kill -9 {pid}')
def get_bg_procs_pids(bg_process_name):
"""return a list of pids from the specified bg_process_name file"""
bg_process_pids_file_name = f'{sh.get_user_home_dir()}/' \
f'{bg_process_name}_pids_file'
bg_process_name_pid_list = []
if os.path.isfile(bg_process_pids_file_name):
LOG.info(f'found previous background process file :'
f' {bg_process_pids_file_name}, cheking it`s processes.')
# go over file's pids
with io.open(bg_process_pids_file_name, 'rt') as fd:
for line in fd.readlines():
pid = line.rstrip()
try:
proc = psutil.Process(int(pid))
# continue if pid is not a valid int or doesn't exist
except (TypeError, ValueError, psutil.NoSuchProcess):
continue
# check if process is running
if proc.status() != psutil.STATUS_ZOMBIE:
LOG.debug(f'skipping process {pid} , it\'s a zombie')
bg_process_name_pid_list.append(pid)
return bg_process_name_pid_list
def check_or_start_background_process(bg_function=None,
bg_process_name=None,
check_function=None, **kwargs):
""" Check if process exists, if so stop the process,
then execute some check logic i.e. a check function.
if the process by name isn't running,
start a separate process i.e a background function
params:
bg_process_name= process name
bg_function: function name
check_function: function name """
procs_running_list = get_bg_procs_pids(bg_process_name)
if procs_running_list:
stop_process(procs_running_list)
# execute process check i.e. go over process results file
LOG.info(f'running a check function: {check_function} '
f'on results of processes: {bg_process_name}')
check_function()
else: # if background process is not present , start one:
LOG.info(f'No previous background processes found:'
f' {bg_process_name}, starting a new background process '
f'of function: {bg_function}')
start_background_process(bg_function=bg_function,
bg_process_name=bg_process_name, **kwargs)

View File

@ -124,6 +124,9 @@ class DisruptTripleoNodesTest(testtools.TestCase):
def test_0vercloud_health_check(self):
OvercloudHealthCheck.run_before(skip_mac_table_size_test=False)
def test_check_background_vm_ping(self):
nova.check_or_start_background_vm_ping()
def test_hard_reboot_controllers_recovery(self):
OvercloudHealthCheck.run_before()
cloud_disruptions.reset_all_controller_nodes()

View File

@ -7,11 +7,13 @@ from oslo_log import log
import pandas
import tobiko
from tobiko import tripleo
from tobiko.tripleo import overcloud
from tobiko.shell import ping
from tobiko.shell import sh
from tobiko.openstack import nova
from tobiko.openstack import topology
from tobiko.openstack import stacks
from tobiko.tripleo import containers
@ -123,6 +125,7 @@ def check_ping_vm_fip(fip):
def check_df_vms_ping(df):
"""input: dataframe with vms_ids
try to ping all vms in df"""
for vm_id in df.vm_id.to_list():
check_ping_vm_fip(vm_floating_ip(vm_id))
@ -218,3 +221,26 @@ def check_computes_vms_running_via_virsh():
else:
LOG.info(f"{vm_id} is not in running state on "
f"{compute.hostname}")
def get_nova_server_floating_ip():
"""get an a running's vm floating_ip"""
return tobiko.setup_fixture(
stacks.CirrosServerStackFixture).floating_ip_address
# Test is inteded for D/S env
@tripleo.skip_if_missing_overcloud
def check_or_start_background_vm_ping():
"""Check if process exists, if so stop and check ping health
if not : start a new separate ping process.
Executes a Background ping to a vm floating_ip,
this test is intended to be run and picked up again
by the next tobiko run. Ping results are parsed
and a failure is raised if ping failure is above a certain amount"""
ping_vm_fip = get_nova_server_floating_ip()
sh.check_or_start_background_process(
bg_function=ping.write_ping_to_file,
bg_process_name='tobiko_background_ping',
check_function=ping.check_ping_statistics,
ping_ip=ping_vm_fip)