Merge "add backround ping during disruptive actions"
This commit is contained in:
commit
b565e89f22
@ -62,3 +62,5 @@ RECEIVED = _ping.RECEIVED
|
||||
UNRECEIVED = _ping.UNRECEIVED
|
||||
|
||||
PingStatistics = _statistics.PingStatistics
|
||||
write_ping_to_file = _ping.write_ping_to_file
|
||||
check_ping_statistics = _ping.check_ping_statistics
|
||||
|
@ -15,9 +15,14 @@
|
||||
# under the License.
|
||||
from __future__ import absolute_import
|
||||
|
||||
import glob
|
||||
import json
|
||||
import io
|
||||
import os
|
||||
import time
|
||||
import typing
|
||||
|
||||
|
||||
import netaddr
|
||||
from oslo_log import log
|
||||
|
||||
@ -415,3 +420,77 @@ def handle_ping_unknow_host_error(text):
|
||||
if text.endswith(suffix):
|
||||
details = text[:-len(suffix)].strip().split()[-1]
|
||||
raise _exception.UnknowHostError(details=details)
|
||||
|
||||
|
||||
def ping_to_json(ping_result: _statistics.PingStatistics) -> str:
|
||||
'''Transform an iter_statistics.statistics object
|
||||
into a json string with ping ip and result'''
|
||||
destination = str(ping_result.destination)
|
||||
transmitted = ping_result.transmitted
|
||||
received = ping_result.received
|
||||
timestamp = time.ctime(ping_result.begin_interval)
|
||||
ping_result_line_dict = {"destination": destination,
|
||||
"transmitted": transmitted,
|
||||
"received": received,
|
||||
"timestamp": timestamp}
|
||||
return json.dumps(ping_result_line_dict)
|
||||
|
||||
|
||||
def write_ping_to_file(ping_ip=None, output_dir='tobiko_ping_results'):
|
||||
'''use iter_statistics to ping a host and record statistics
|
||||
put results in output_dir filenames correlate with vm fip'''
|
||||
output_dir_path = f'{sh.get_user_home_dir()}/{output_dir}'
|
||||
if not os.path.exists(output_dir_path):
|
||||
os.makedirs(output_dir_path)
|
||||
output_filename = f'ping_{ping_ip}.log'
|
||||
output_path = os.path.join(output_dir_path, output_filename)
|
||||
LOG.info(f'starting ping process to > {ping_ip} , '
|
||||
f'output file is : {output_path}')
|
||||
ping_result_statistics = iter_statistics(parameters=None,
|
||||
host=ping_ip, until=None,
|
||||
timeout=99999,
|
||||
check=True)
|
||||
for ping_result in ping_result_statistics:
|
||||
with open(output_path, "at") as ping_result_file:
|
||||
ping_result_file.write(ping_to_json(ping_result) + "\n")
|
||||
time.sleep(5)
|
||||
|
||||
|
||||
def get_vm_ping_log_files(glob_ping_log_pattern='tobiko_ping_results/ping_'
|
||||
'*.log'):
|
||||
"""return a list of files mathcing : the pattern"""
|
||||
glob_path = f'{sh.get_user_home_dir()}/{glob_ping_log_pattern}'
|
||||
for filename in glob.glob(glob_path):
|
||||
LOG.info(f'found following ping_vm_log files {filename}')
|
||||
vm_ping_log_filename = filename
|
||||
yield vm_ping_log_filename
|
||||
|
||||
|
||||
def rename_ping_staistics_file_to_checked(filepath):
|
||||
"""append _checked to a ping statistics file once finished it's check"""
|
||||
os.rename(filepath, f'{filepath}_checked')
|
||||
|
||||
|
||||
def check_ping_statistics(failure_limit=10):
|
||||
"""Gets a list of ping_vm_log files and
|
||||
iterates their lines, checks if max ping
|
||||
failures have been reached per fip=file"""
|
||||
# iterate over ping_vm_log files:
|
||||
for filename in list(get_vm_ping_log_files()):
|
||||
with io.open(filename, 'rt') as fd:
|
||||
LOG.info(f'checking ping log file: {filename}, '
|
||||
f'failure_limit is :{failure_limit}')
|
||||
failure_counter = 0
|
||||
for ping_line in fd.readlines():
|
||||
ping_line = json.loads(ping_line.rstrip())
|
||||
if ping_line['transmitted'] != ping_line['received']:
|
||||
failure_counter += 1
|
||||
LOG.debug(f'found ping failure to :'
|
||||
f' {ping_line["destination"]}')
|
||||
if failure_counter >= failure_limit:
|
||||
rename_ping_staistics_file_to_checked(filename)
|
||||
tobiko.fail(f'{failure_counter} pings failure found '
|
||||
f'to vm fip destination: '
|
||||
f'{ping_line["destination"]}')
|
||||
LOG.info(f'no failures in ping log file: {filename}')
|
||||
rename_ping_staistics_file_to_checked(filename)
|
||||
|
@ -24,6 +24,7 @@ from tobiko.shell.sh import _io
|
||||
from tobiko.shell.sh import _local
|
||||
from tobiko.shell.sh import _mktemp
|
||||
from tobiko.shell.sh import _nameservers
|
||||
from tobiko.shell.sh import _path
|
||||
from tobiko.shell.sh import _process
|
||||
from tobiko.shell.sh import _ps
|
||||
from tobiko.shell.sh import _reboot
|
||||
@ -71,6 +72,10 @@ ListNameserversFixture = _nameservers.ListNameserversFixture
|
||||
list_nameservers = _nameservers.list_nameservers
|
||||
|
||||
process = _process.process
|
||||
start_background_process = _process.start_background_process
|
||||
check_or_start_background_process =\
|
||||
_process.check_or_start_background_process
|
||||
get_user_home_dir = _path.get_user_home_dir
|
||||
str_from_stream = _process.str_from_stream
|
||||
ShellProcessFixture = _process.ShellProcessFixture
|
||||
|
||||
|
@ -16,6 +16,7 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import typing # noqa
|
||||
from os.path import expanduser
|
||||
|
||||
from oslo_log import log
|
||||
|
||||
@ -25,6 +26,11 @@ import tobiko
|
||||
LOG = log.getLogger(__name__)
|
||||
|
||||
|
||||
def get_user_home_dir():
|
||||
"""return a str path of the current user's home"""
|
||||
return expanduser("~")
|
||||
|
||||
|
||||
class ExecutePathFixture(tobiko.SharedFixture):
|
||||
|
||||
def __init__(self, executable_dirs=None, environ=None):
|
||||
|
@ -16,11 +16,17 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
import io
|
||||
import os
|
||||
import time
|
||||
import typing # noqa
|
||||
from multiprocessing import Process as MultiProcess
|
||||
|
||||
import psutil
|
||||
from oslo_log import log
|
||||
|
||||
|
||||
import tobiko
|
||||
from tobiko.shell import sh
|
||||
from tobiko.shell.sh import _command
|
||||
from tobiko.shell.sh import _exception
|
||||
from tobiko.shell.sh import _io
|
||||
@ -457,3 +463,104 @@ def default_sudo_command():
|
||||
def network_namespace_command(network_namespace, command):
|
||||
return _command.shell_command(['/sbin/ip', 'netns', 'exec',
|
||||
network_namespace]) + command
|
||||
|
||||
|
||||
def start_background_process(bg_function=None, bg_process_name=None, **kwargs):
|
||||
"""Background process that will take a function name as parameter
|
||||
and execute it in the background using a separate non attached process.
|
||||
That process will continue to run even after Tobiko exists.
|
||||
params:
|
||||
bg_function= function name to run in background
|
||||
bg_process_pid_file= file path that will contain the process pid, multiple
|
||||
processes can use the same file pid are appended.
|
||||
outputs: writes processes pids to a file, each in a line
|
||||
returns: the process object"""
|
||||
|
||||
# define a parent process that would be killed and orphan the actual
|
||||
# background process to run unattached in the background
|
||||
# this is so the background process won't be stopped when tobiko exists
|
||||
def _background_process_parent():
|
||||
p = MultiProcess(target=bg_function, name=bg_process_name,
|
||||
kwargs=kwargs)
|
||||
p.start()
|
||||
LOG.info(
|
||||
f'Started background function: {bg_function.__name__} process pid '
|
||||
f'is: {p.pid}, process name: {bg_process_name}, '
|
||||
f'main execution process continues...')
|
||||
# append bg_process pid to a file
|
||||
bg_process_pids_file_name = f'{sh.get_user_home_dir()}/' \
|
||||
f'{bg_process_name}_pids_file'
|
||||
with open(bg_process_pids_file_name, "at") as bg_process_pid_file:
|
||||
bg_process_pid_file.write(str(p.pid) + "\n")
|
||||
LOG.debug(f'Writing pid: {p.pid} to pids file:'
|
||||
f' {bg_process_pids_file_name}')
|
||||
|
||||
# start parent process, nested with a started child process
|
||||
# then kill the parent
|
||||
d = MultiProcess(target=_background_process_parent)
|
||||
d.daemon = False
|
||||
d.start()
|
||||
LOG.debug(f'Background process parent started pid: {d.pid}')
|
||||
time.sleep(1)
|
||||
d.terminate()
|
||||
LOG.debug(f'Background process orphaned, parent killed parent pid:'
|
||||
f' {d.pid}')
|
||||
|
||||
|
||||
def stop_process(pid_list):
|
||||
"""Stop (kill) a process from a list"""
|
||||
for pid in pid_list:
|
||||
|
||||
LOG.info(f'stopping process with pid: {pid}')
|
||||
sh.execute(f'sudo kill -9 {pid}')
|
||||
|
||||
|
||||
def get_bg_procs_pids(bg_process_name):
|
||||
"""return a list of pids from the specified bg_process_name file"""
|
||||
bg_process_pids_file_name = f'{sh.get_user_home_dir()}/' \
|
||||
f'{bg_process_name}_pids_file'
|
||||
bg_process_name_pid_list = []
|
||||
if os.path.isfile(bg_process_pids_file_name):
|
||||
LOG.info(f'found previous background process file :'
|
||||
f' {bg_process_pids_file_name}, cheking it`s processes.')
|
||||
# go over file's pids
|
||||
with io.open(bg_process_pids_file_name, 'rt') as fd:
|
||||
for line in fd.readlines():
|
||||
pid = line.rstrip()
|
||||
try:
|
||||
proc = psutil.Process(int(pid))
|
||||
# continue if pid is not a valid int or doesn't exist
|
||||
except (TypeError, ValueError, psutil.NoSuchProcess):
|
||||
continue
|
||||
# check if process is running
|
||||
if proc.status() != psutil.STATUS_ZOMBIE:
|
||||
LOG.debug(f'skipping process {pid} , it\'s a zombie')
|
||||
bg_process_name_pid_list.append(pid)
|
||||
return bg_process_name_pid_list
|
||||
|
||||
|
||||
def check_or_start_background_process(bg_function=None,
|
||||
bg_process_name=None,
|
||||
check_function=None, **kwargs):
|
||||
""" Check if process exists, if so stop the process,
|
||||
then execute some check logic i.e. a check function.
|
||||
if the process by name isn't running,
|
||||
start a separate process i.e a background function
|
||||
params:
|
||||
bg_process_name= process name
|
||||
bg_function: function name
|
||||
check_function: function name """
|
||||
procs_running_list = get_bg_procs_pids(bg_process_name)
|
||||
if procs_running_list:
|
||||
stop_process(procs_running_list)
|
||||
# execute process check i.e. go over process results file
|
||||
LOG.info(f'running a check function: {check_function} '
|
||||
f'on results of processes: {bg_process_name}')
|
||||
check_function()
|
||||
|
||||
else: # if background process is not present , start one:
|
||||
LOG.info(f'No previous background processes found:'
|
||||
f' {bg_process_name}, starting a new background process '
|
||||
f'of function: {bg_function}')
|
||||
start_background_process(bg_function=bg_function,
|
||||
bg_process_name=bg_process_name, **kwargs)
|
||||
|
@ -124,6 +124,9 @@ class DisruptTripleoNodesTest(testtools.TestCase):
|
||||
def test_0vercloud_health_check(self):
|
||||
OvercloudHealthCheck.run_before(skip_mac_table_size_test=False)
|
||||
|
||||
def test_check_background_vm_ping(self):
|
||||
nova.check_or_start_background_vm_ping()
|
||||
|
||||
def test_hard_reboot_controllers_recovery(self):
|
||||
OvercloudHealthCheck.run_before()
|
||||
cloud_disruptions.reset_all_controller_nodes()
|
||||
|
@ -7,11 +7,13 @@ from oslo_log import log
|
||||
import pandas
|
||||
|
||||
import tobiko
|
||||
from tobiko import tripleo
|
||||
from tobiko.tripleo import overcloud
|
||||
from tobiko.shell import ping
|
||||
from tobiko.shell import sh
|
||||
from tobiko.openstack import nova
|
||||
from tobiko.openstack import topology
|
||||
from tobiko.openstack import stacks
|
||||
from tobiko.tripleo import containers
|
||||
|
||||
|
||||
@ -123,6 +125,7 @@ def check_ping_vm_fip(fip):
|
||||
def check_df_vms_ping(df):
|
||||
"""input: dataframe with vms_ids
|
||||
try to ping all vms in df"""
|
||||
|
||||
for vm_id in df.vm_id.to_list():
|
||||
check_ping_vm_fip(vm_floating_ip(vm_id))
|
||||
|
||||
@ -218,3 +221,26 @@ def check_computes_vms_running_via_virsh():
|
||||
else:
|
||||
LOG.info(f"{vm_id} is not in running state on "
|
||||
f"{compute.hostname}")
|
||||
|
||||
|
||||
def get_nova_server_floating_ip():
|
||||
"""get an a running's vm floating_ip"""
|
||||
return tobiko.setup_fixture(
|
||||
stacks.CirrosServerStackFixture).floating_ip_address
|
||||
|
||||
|
||||
# Test is inteded for D/S env
|
||||
@tripleo.skip_if_missing_overcloud
|
||||
def check_or_start_background_vm_ping():
|
||||
"""Check if process exists, if so stop and check ping health
|
||||
if not : start a new separate ping process.
|
||||
Executes a Background ping to a vm floating_ip,
|
||||
this test is intended to be run and picked up again
|
||||
by the next tobiko run. Ping results are parsed
|
||||
and a failure is raised if ping failure is above a certain amount"""
|
||||
ping_vm_fip = get_nova_server_floating_ip()
|
||||
sh.check_or_start_background_process(
|
||||
bg_function=ping.write_ping_to_file,
|
||||
bg_process_name='tobiko_background_ping',
|
||||
check_function=ping.check_ping_statistics,
|
||||
ping_ip=ping_vm_fip)
|
||||
|
Loading…
x
Reference in New Issue
Block a user