diff --git a/tobiko/shell/ping/__init__.py b/tobiko/shell/ping/__init__.py index 72fc1e815..156069b3a 100644 --- a/tobiko/shell/ping/__init__.py +++ b/tobiko/shell/ping/__init__.py @@ -62,3 +62,5 @@ RECEIVED = _ping.RECEIVED UNRECEIVED = _ping.UNRECEIVED PingStatistics = _statistics.PingStatistics +write_ping_to_file = _ping.write_ping_to_file +check_ping_statistics = _ping.check_ping_statistics diff --git a/tobiko/shell/ping/_ping.py b/tobiko/shell/ping/_ping.py index d002e5e40..0ee9537b0 100644 --- a/tobiko/shell/ping/_ping.py +++ b/tobiko/shell/ping/_ping.py @@ -15,9 +15,14 @@ # under the License. from __future__ import absolute_import +import glob +import json +import io +import os import time import typing + import netaddr from oslo_log import log @@ -415,3 +420,77 @@ def handle_ping_unknow_host_error(text): if text.endswith(suffix): details = text[:-len(suffix)].strip().split()[-1] raise _exception.UnknowHostError(details=details) + + +def ping_to_json(ping_result: _statistics.PingStatistics) -> str: + '''Transform an iter_statistics.statistics object + into a json string with ping ip and result''' + destination = str(ping_result.destination) + transmitted = ping_result.transmitted + received = ping_result.received + timestamp = time.ctime(ping_result.begin_interval) + ping_result_line_dict = {"destination": destination, + "transmitted": transmitted, + "received": received, + "timestamp": timestamp} + return json.dumps(ping_result_line_dict) + + +def write_ping_to_file(ping_ip=None, output_dir='tobiko_ping_results'): + '''use iter_statistics to ping a host and record statistics + put results in output_dir filenames correlate with vm fip''' + output_dir_path = f'{sh.get_user_home_dir()}/{output_dir}' + if not os.path.exists(output_dir_path): + os.makedirs(output_dir_path) + output_filename = f'ping_{ping_ip}.log' + output_path = os.path.join(output_dir_path, output_filename) + LOG.info(f'starting ping process to > {ping_ip} , ' + f'output file is : {output_path}') + ping_result_statistics = iter_statistics(parameters=None, + host=ping_ip, until=None, + timeout=99999, + check=True) + for ping_result in ping_result_statistics: + with open(output_path, "at") as ping_result_file: + ping_result_file.write(ping_to_json(ping_result) + "\n") + time.sleep(5) + + +def get_vm_ping_log_files(glob_ping_log_pattern='tobiko_ping_results/ping_' + '*.log'): + """return a list of files mathcing : the pattern""" + glob_path = f'{sh.get_user_home_dir()}/{glob_ping_log_pattern}' + for filename in glob.glob(glob_path): + LOG.info(f'found following ping_vm_log files {filename}') + vm_ping_log_filename = filename + yield vm_ping_log_filename + + +def rename_ping_staistics_file_to_checked(filepath): + """append _checked to a ping statistics file once finished it's check""" + os.rename(filepath, f'{filepath}_checked') + + +def check_ping_statistics(failure_limit=10): + """Gets a list of ping_vm_log files and + iterates their lines, checks if max ping + failures have been reached per fip=file""" + # iterate over ping_vm_log files: + for filename in list(get_vm_ping_log_files()): + with io.open(filename, 'rt') as fd: + LOG.info(f'checking ping log file: {filename}, ' + f'failure_limit is :{failure_limit}') + failure_counter = 0 + for ping_line in fd.readlines(): + ping_line = json.loads(ping_line.rstrip()) + if ping_line['transmitted'] != ping_line['received']: + failure_counter += 1 + LOG.debug(f'found ping failure to :' + f' {ping_line["destination"]}') + if failure_counter >= failure_limit: + rename_ping_staistics_file_to_checked(filename) + tobiko.fail(f'{failure_counter} pings failure found ' + f'to vm fip destination: ' + f'{ping_line["destination"]}') + LOG.info(f'no failures in ping log file: {filename}') + rename_ping_staistics_file_to_checked(filename) diff --git a/tobiko/shell/sh/__init__.py b/tobiko/shell/sh/__init__.py index 0fa1ae897..87eb83bd2 100644 --- a/tobiko/shell/sh/__init__.py +++ b/tobiko/shell/sh/__init__.py @@ -24,6 +24,7 @@ from tobiko.shell.sh import _io from tobiko.shell.sh import _local from tobiko.shell.sh import _mktemp from tobiko.shell.sh import _nameservers +from tobiko.shell.sh import _path from tobiko.shell.sh import _process from tobiko.shell.sh import _ps from tobiko.shell.sh import _reboot @@ -71,6 +72,10 @@ ListNameserversFixture = _nameservers.ListNameserversFixture list_nameservers = _nameservers.list_nameservers process = _process.process +start_background_process = _process.start_background_process +check_or_start_background_process =\ + _process.check_or_start_background_process +get_user_home_dir = _path.get_user_home_dir str_from_stream = _process.str_from_stream ShellProcessFixture = _process.ShellProcessFixture diff --git a/tobiko/shell/sh/_path.py b/tobiko/shell/sh/_path.py index 35144ba5b..49f707410 100644 --- a/tobiko/shell/sh/_path.py +++ b/tobiko/shell/sh/_path.py @@ -16,6 +16,7 @@ from __future__ import absolute_import import typing # noqa +from os.path import expanduser from oslo_log import log @@ -25,6 +26,11 @@ import tobiko LOG = log.getLogger(__name__) +def get_user_home_dir(): + """return a str path of the current user's home""" + return expanduser("~") + + class ExecutePathFixture(tobiko.SharedFixture): def __init__(self, executable_dirs=None, environ=None): diff --git a/tobiko/shell/sh/_process.py b/tobiko/shell/sh/_process.py index 61e6c90df..d68887af8 100644 --- a/tobiko/shell/sh/_process.py +++ b/tobiko/shell/sh/_process.py @@ -16,11 +16,17 @@ from __future__ import absolute_import import io +import os +import time import typing # noqa +from multiprocessing import Process as MultiProcess +import psutil from oslo_log import log + import tobiko +from tobiko.shell import sh from tobiko.shell.sh import _command from tobiko.shell.sh import _exception from tobiko.shell.sh import _io @@ -457,3 +463,104 @@ def default_sudo_command(): def network_namespace_command(network_namespace, command): return _command.shell_command(['/sbin/ip', 'netns', 'exec', network_namespace]) + command + + +def start_background_process(bg_function=None, bg_process_name=None, **kwargs): + """Background process that will take a function name as parameter + and execute it in the background using a separate non attached process. + That process will continue to run even after Tobiko exists. + params: + bg_function= function name to run in background + bg_process_pid_file= file path that will contain the process pid, multiple + processes can use the same file pid are appended. + outputs: writes processes pids to a file, each in a line + returns: the process object""" + + # define a parent process that would be killed and orphan the actual + # background process to run unattached in the background + # this is so the background process won't be stopped when tobiko exists + def _background_process_parent(): + p = MultiProcess(target=bg_function, name=bg_process_name, + kwargs=kwargs) + p.start() + LOG.info( + f'Started background function: {bg_function.__name__} process pid ' + f'is: {p.pid}, process name: {bg_process_name}, ' + f'main execution process continues...') + # append bg_process pid to a file + bg_process_pids_file_name = f'{sh.get_user_home_dir()}/' \ + f'{bg_process_name}_pids_file' + with open(bg_process_pids_file_name, "at") as bg_process_pid_file: + bg_process_pid_file.write(str(p.pid) + "\n") + LOG.debug(f'Writing pid: {p.pid} to pids file:' + f' {bg_process_pids_file_name}') + + # start parent process, nested with a started child process + # then kill the parent + d = MultiProcess(target=_background_process_parent) + d.daemon = False + d.start() + LOG.debug(f'Background process parent started pid: {d.pid}') + time.sleep(1) + d.terminate() + LOG.debug(f'Background process orphaned, parent killed parent pid:' + f' {d.pid}') + + +def stop_process(pid_list): + """Stop (kill) a process from a list""" + for pid in pid_list: + + LOG.info(f'stopping process with pid: {pid}') + sh.execute(f'sudo kill -9 {pid}') + + +def get_bg_procs_pids(bg_process_name): + """return a list of pids from the specified bg_process_name file""" + bg_process_pids_file_name = f'{sh.get_user_home_dir()}/' \ + f'{bg_process_name}_pids_file' + bg_process_name_pid_list = [] + if os.path.isfile(bg_process_pids_file_name): + LOG.info(f'found previous background process file :' + f' {bg_process_pids_file_name}, cheking it`s processes.') + # go over file's pids + with io.open(bg_process_pids_file_name, 'rt') as fd: + for line in fd.readlines(): + pid = line.rstrip() + try: + proc = psutil.Process(int(pid)) + # continue if pid is not a valid int or doesn't exist + except (TypeError, ValueError, psutil.NoSuchProcess): + continue + # check if process is running + if proc.status() != psutil.STATUS_ZOMBIE: + LOG.debug(f'skipping process {pid} , it\'s a zombie') + bg_process_name_pid_list.append(pid) + return bg_process_name_pid_list + + +def check_or_start_background_process(bg_function=None, + bg_process_name=None, + check_function=None, **kwargs): + """ Check if process exists, if so stop the process, + then execute some check logic i.e. a check function. + if the process by name isn't running, + start a separate process i.e a background function + params: + bg_process_name= process name + bg_function: function name + check_function: function name """ + procs_running_list = get_bg_procs_pids(bg_process_name) + if procs_running_list: + stop_process(procs_running_list) + # execute process check i.e. go over process results file + LOG.info(f'running a check function: {check_function} ' + f'on results of processes: {bg_process_name}') + check_function() + + else: # if background process is not present , start one: + LOG.info(f'No previous background processes found:' + f' {bg_process_name}, starting a new background process ' + f'of function: {bg_function}') + start_background_process(bg_function=bg_function, + bg_process_name=bg_process_name, **kwargs) diff --git a/tobiko/tests/faults/ha/test_cloud_recovery.py b/tobiko/tests/faults/ha/test_cloud_recovery.py index 76b38858b..30158676a 100644 --- a/tobiko/tests/faults/ha/test_cloud_recovery.py +++ b/tobiko/tests/faults/ha/test_cloud_recovery.py @@ -124,6 +124,9 @@ class DisruptTripleoNodesTest(testtools.TestCase): def test_0vercloud_health_check(self): OvercloudHealthCheck.run_before(skip_mac_table_size_test=False) + def test_check_background_vm_ping(self): + nova.check_or_start_background_vm_ping() + def test_hard_reboot_controllers_recovery(self): OvercloudHealthCheck.run_before() cloud_disruptions.reset_all_controller_nodes() diff --git a/tobiko/tripleo/nova.py b/tobiko/tripleo/nova.py index 5eb7628aa..28d844744 100644 --- a/tobiko/tripleo/nova.py +++ b/tobiko/tripleo/nova.py @@ -7,11 +7,13 @@ from oslo_log import log import pandas import tobiko +from tobiko import tripleo from tobiko.tripleo import overcloud from tobiko.shell import ping from tobiko.shell import sh from tobiko.openstack import nova from tobiko.openstack import topology +from tobiko.openstack import stacks from tobiko.tripleo import containers @@ -123,6 +125,7 @@ def check_ping_vm_fip(fip): def check_df_vms_ping(df): """input: dataframe with vms_ids try to ping all vms in df""" + for vm_id in df.vm_id.to_list(): check_ping_vm_fip(vm_floating_ip(vm_id)) @@ -218,3 +221,26 @@ def check_computes_vms_running_via_virsh(): else: LOG.info(f"{vm_id} is not in running state on " f"{compute.hostname}") + + +def get_nova_server_floating_ip(): + """get an a running's vm floating_ip""" + return tobiko.setup_fixture( + stacks.CirrosServerStackFixture).floating_ip_address + + +# Test is inteded for D/S env +@tripleo.skip_if_missing_overcloud +def check_or_start_background_vm_ping(): + """Check if process exists, if so stop and check ping health + if not : start a new separate ping process. + Executes a Background ping to a vm floating_ip, + this test is intended to be run and picked up again + by the next tobiko run. Ping results are parsed + and a failure is raised if ping failure is above a certain amount""" + ping_vm_fip = get_nova_server_floating_ip() + sh.check_or_start_background_process( + bg_function=ping.write_ping_to_file, + bg_process_name='tobiko_background_ping', + check_function=ping.check_ping_statistics, + ping_ip=ping_vm_fip)